# Pandas Tutorial
[Corey Schafer's Playlist](https://www.youtube.com/playlist?list=PL-osiE80TeTsWmV9i9c58mdDCSskIFdDS)

## Part 1: Loading Data

In [1]:
import pandas as pd

In [2]:
df = pd.read_csv("https://raw.githubusercontent.com/ChrisMusson/FPL-ID-Map/main/Understat.csv")

In [3]:
df

Unnamed: 0,code,first_name,second_name,web_name,understat
0,1243,Robert,Green,Green,4390.0
1,1616,Alexander,Manninger,Manninger,277.0
2,1632,Gareth,Barry,Barry,590.0
3,1718,John,Terry,Terry,917.0
4,1801,Paul,Robinson,Robinson,1667.0
...,...,...,...,...,...
1763,536110,Josh,Feeney,Feeney,
1764,538207,William,Osula,Osula,
1765,547701,Archie,Gray,Gray,
1766,563883,Vincent,Angelini,Angelini,


In [4]:
df.head()

Unnamed: 0,code,first_name,second_name,web_name,understat
0,1243,Robert,Green,Green,4390.0
1,1616,Alexander,Manninger,Manninger,277.0
2,1632,Gareth,Barry,Barry,590.0
3,1718,John,Terry,Terry,917.0
4,1801,Paul,Robinson,Robinson,1667.0


In [5]:
df.tail()

Unnamed: 0,code,first_name,second_name,web_name,understat
1763,536110,Josh,Feeney,Feeney,
1764,538207,William,Osula,Osula,
1765,547701,Archie,Gray,Gray,
1766,563883,Vincent,Angelini,Angelini,
1767,573808,Jack,Grieves,Grieves,


In [6]:
df.shape

(1768, 5)

In [7]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1768 entries, 0 to 1767
Data columns (total 5 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   code         1768 non-null   int64  
 1   first_name   1768 non-null   object 
 2   second_name  1768 non-null   object 
 3   web_name     1768 non-null   object 
 4   understat    1368 non-null   float64
dtypes: float64(1), int64(1), object(3)
memory usage: 69.2+ KB


## Part 2: Selecting Rows & Columns

In [8]:
# df.first_name will give same output
df["first_name"]

0          Robert
1       Alexander
2          Gareth
3            John
4            Paul
          ...    
1763         Josh
1764      William
1765       Archie
1766      Vincent
1767         Jack
Name: first_name, Length: 1768, dtype: object

In [9]:
type(df["first_name"])

pandas.core.series.Series

In [10]:
# DataFrame -> rows & column
# Series -> column

In [11]:
df[["first_name", "second_name"]]

Unnamed: 0,first_name,second_name
0,Robert,Green
1,Alexander,Manninger
2,Gareth,Barry
3,John,Terry
4,Paul,Robinson
...,...,...
1763,Josh,Feeney
1764,William,Osula
1765,Archie,Gray
1766,Vincent,Angelini


In [12]:
type(df[["first_name", "second_name"]])

pandas.core.frame.DataFrame

In [13]:
df.columns

Index(['code', 'first_name', 'second_name', 'web_name', 'understat'], dtype='object')

In [14]:
df.iloc[0]

code             1243
first_name     Robert
second_name     Green
web_name        Green
understat      4390.0
Name: 0, dtype: object

In [15]:
df.iloc[[0, 1], [1, 2]]

Unnamed: 0,first_name,second_name
0,Robert,Green
1,Alexander,Manninger


In [16]:
df.loc[0]

code             1243
first_name     Robert
second_name     Green
web_name        Green
understat      4390.0
Name: 0, dtype: object

In [17]:
df.loc[[0, 1], ["first_name", "second_name"]]

Unnamed: 0,first_name,second_name
0,Robert,Green
1,Alexander,Manninger


In [18]:
for row in df.values:
    print(row)

[1243 'Robert' 'Green' 'Green' 4390.0]
[1616 'Alexander' 'Manninger' 'Manninger' 277.0]
[1632 'Gareth' 'Barry' 'Barry' 590.0]
[1718 'John' 'Terry' 'Terry' 917.0]
[1801 'Paul' 'Robinson' 'Robinson' 1667.0]
[1822 'Shay' 'Given' 'Given' 889.0]
[2404 'Michael' 'Carrick' 'Carrick' 654.0]
[2513 'Jamie' 'Murphy' 'Murphy' 6052.0]
[3201 'Stuart' 'Taylor' 'Taylor' nan]
[3736 'John' "O'Shea" "O'Shea" 729.0]
[3773 'Peter' 'Crouch' 'Crouch' 872.0]
[5288 'Gerhard' 'Tremmel' 'Tremmel' 1062.0]
[5589 'Dean' 'Whitehead' 'Whitehead' 6328.0]
[6744 'Lee' 'Grant' 'Grant' 1742.0]
[7525 'Steven' 'Pienaar' 'Pienaar' 924.0]
[7551 'Joleon' 'Lescott' 'Lescott' 664.0]
[7638 'Mark' 'Hudson' 'Hudson' nan]
[7645 'Phil' 'Jagielka' 'Jagielka' 587.0]
[7906 'Damien' 'Delaney' 'Delaney' 511.0]
[7958 'Jermain' 'Defoe' 'Defoe' 735.0]
[8380 'James' 'Collins' 'Collins' 655.0]
[8432 'José' 'Reina' 'Reina' 1374.0]
[9047 'Glen' 'Johnson' 'Johnson' 944.0]
[9089 'Ben' 'Foster' 'Foster' 803.0]
[9110 'Shaun' 'Maloney' 'Maloney' 1692

In [19]:
srs_first_name = df["first_name"]
srs_first_name

0          Robert
1       Alexander
2          Gareth
3            John
4            Paul
          ...    
1763         Josh
1764      William
1765       Archie
1766      Vincent
1767         Jack
Name: first_name, Length: 1768, dtype: object

In [23]:
# get index from series based on matching element
srs_first_name[srs_first_name == "Samir"].index[0]

165

In [24]:
df.columns

Index(['code', 'first_name', 'second_name', 'web_name', 'understat'], dtype='object')

In [21]:
df.loc[165]

code           28554
first_name     Samir
second_name    Nasri
web_name       Nasri
understat      877.0
Name: 165, dtype: object

In [25]:
df.loc[0:1, "first_name": "web_name"]

Unnamed: 0,first_name,second_name,web_name
0,Robert,Green,Green
1,Alexander,Manninger,Manninger
