## Setup

In [1]:
import pandas as pd
import plotly_express as px

## Input

In [2]:
url = 'https://docs.google.com/spreadsheets/d/1peJis68KbNsD1jKwW32g3VOGCqMmdfjwGdGcpeukli8/export?format=csv&gid=1955284808'
hoops_df = pd.read_csv(url)

## Process

### Review the data

#### info()

In [3]:
hoops_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 494 entries, 0 to 493
Data columns (total 4 columns):
 #   Column                Non-Null Count  Dtype 
---  ------                --------------  ----- 
 0   Timestamp             494 non-null    object
 1   First Name            494 non-null    object
 2   Shot Distance (feet)  494 non-null    int64 
 3   Shot Made?            494 non-null    bool  
dtypes: bool(1), int64(1), object(2)
memory usage: 12.2+ KB


#### head()

In [4]:
display(hoops_df.head())

Unnamed: 0,Timestamp,First Name,Shot Distance (feet),Shot Made?
0,10/18/2023 10:51:01,David,10,False
1,10/18/2023 10:53:00,David,10,True
2,10/18/2023 13:38:16,MG,8,True
3,10/18/2023 13:38:25,MG,8,False
4,10/18/2023 13:38:33,MG,8,False


#### tail()

In [5]:
display(hoops_df.tail())

Unnamed: 0,Timestamp,First Name,Shot Distance (feet),Shot Made?
489,11/28/2023 14:20:35,Julia,4,False
490,11/28/2023 14:20:36,Kvitka,4,False
491,11/28/2023 14:20:48,J.Ho,4,True
492,11/28/2023 14:20:53,Kvitka,4,True
493,11/28/2023 14:24:23,Nico,4,False


#### columns

Lesson: https://github.com/pbeens/Data-Analysis/blob/main/BADS/01-Intro/01-01-columns.ipynb

In [19]:
display(hoops_df.columns)

Index(['Timestamp', 'First Name', 'Shot Distance (feet)', 'Shot Made?'], dtype='object')

Here's another way to look at the column names. This method uses a Python `for loop`.

In [20]:
for name in hoops_df.columns:
    print(name)

Timestamp
First Name
Shot Distance (feet)
Shot Made?


#### sorting the data

Lesson: https://github.com/pbeens/Data-Analysis/blob/main/BADS/01-Intro/01-03-sorting-data.ipynb

In [11]:
hoops_df.sort_values(['First Name', 'Shot Distance (feet)'], 
                     ascending=[True, True], 
                     inplace=True)
display(hoops_df.head())
display(hoops_df.tail())

Unnamed: 0,Timestamp,First Name,Shot Distance (feet),Shot Made?
241,11/14/2023 8:21:43,Hope,10,True
463,11/28/2023 13:11:19,AA_Milne,2,False
464,11/28/2023 13:11:28,AA_Milne,2,False
465,11/28/2023 13:11:37,AA_Milne,2,False
466,11/28/2023 13:11:45,AA_Milne,2,True


Unnamed: 0,Timestamp,First Name,Shot Distance (feet),Shot Made?
445,11/23/2023 8:50:33,naya,6,True
370,11/14/2023 9:25:34,naya,8,True
410,11/23/2023 8:41:24,naya,8,False
371,11/14/2023 9:25:44,naya,10,False
435,11/23/2023 8:45:57,naya,10,True


#### unique values

In [22]:
display(hoops_df['First Name'].unique())

array([' Hope', 'AD', 'AT', 'Adrianna', 'Alejandro', 'Alina', 'Anna',
       'Anna ', 'Annamaria', 'Ava', 'Ava ', 'Barry', 'Benjie',
       'Bernadette ', 'Bill', 'Bill ', 'Brad', 'Breanna ', 'C.Z.',
       'Carolyn ', 'Daryl', 'Dave', 'David', 'David ', 'Dean', 'Dennis',
       'Dirk', 'Divine', 'Don', 'Dustin', 'Dylan ', 'Edgar', 'Francisco',
       'Geoff', 'Grant', 'Grant ', 'Greg', 'Grorge', 'Gurm', 'Heoff',
       'Hope', 'Hope ', 'I', 'IT dept', 'Imran ', 'Ishruna', 'J.G', 'JE',
       'Jessica', 'Jether', 'Jether ', 'Jhustin', 'Jhustin ', 'John',
       'Jordan', 'Joshua G', 'Josip', 'Julie', 'KELVIN', 'Kal', 'Kate',
       'Kelly', 'Kevin', 'Kirsten', 'Kobe', 'Kylie', 'LN', 'Lebron',
       'Lesley', 'MG', 'MPT', 'Mac', 'Marco', 'Marina C', 'Marjoire',
       'Marjorie', 'Marjorie ', 'Mark', 'Meagan', 'Michelle', 'Mike',
       'NP', 'Nathan', 'Naya', 'Nico', 'Omar', 'PB', 'Pooja', 'R.M',
       'Rebecca', 'Riley', 'Rolland', 'Ryan', 'SB', 'SIN 608 126 645',
       'Scooter', 

### Clean up the data

#### filtering the data (by name)

Lesson: https://github.com/pbeens/Data-Analysis/blob/main/BADS/01-Intro/01-02-filtering-data.ipynb

Notice that we're putting the data in a new dataframe named `student_df`.

In [12]:
name = 'Adrianna'

filter = hoops_df['First Name'] == name

student_df = hoops_df[filter].sort_values('Shot Distance (feet)')

display(student_df)

Unnamed: 0,Timestamp,First Name,Shot Distance (feet),Shot Made?
254,11/14/2023 8:22:38,Adrianna,2,True
335,11/14/2023 9:01:32,Adrianna,2,True
383,11/14/2023 11:15:23,Adrianna,2,True
404,11/23/2023 8:40:48,Adrianna,2,True
239,11/14/2023 8:21:16,Adrianna,4,True
426,11/23/2023 8:44:24,Adrianna,4,True
424,11/23/2023 8:43:27,Adrianna,6,True
400,11/23/2023 8:40:15,Adrianna,6,True
382,11/14/2023 11:15:17,Adrianna,6,True
377,11/14/2023 11:11:27,Adrianna,6,True


## Output

In [25]:
grouped = student_df.groupby('Shot Distance (feet)')['Shot Made?'].mean()*100
fig = px.bar(grouped, 
    x=grouped.index, 
    y='Shot Made?', 
    title=f'Percentage of Shots Made by {name} versus Distance')
fig.update_yaxes(title_text='Shots Made (%)')
fig.show()

In [26]:
fig = px.bar(student_df.groupby('Shot Distance (feet)')['Shot Made?'].mean()*100,
    title=f'Percentage of Shots Made by {name} versus Distance')
fig.update_layout(
    yaxis_title="Percentage of Shots Made"
)
fig.show()