In [141]:
import pandas as pd
import numpy as np

## Series and DataFrames

# Create a dictionary (it works with lists, too)

idnumber  = {
            'Carlos Goes': '06/99209',
            "Nicolas Powidayko": '10/22290',
            "Alexander Rabbat": '08/21346',
            "Dani Alaino": '07/20345',
            "Lya Nikate": '09/23567',
            "Niz Borroz": '11/22035',
            "Tom Rundal": "98/20145"
            }

# Transform it into a Pandas Series

idnumber = pd.Series(idnumber)

In [142]:
# You can use iloc and loc to return the values for each numerical or verbose index

idnumber.loc['Carlos Goes']

'06/99209'

In [143]:
idnumber.iloc[1]

'06/99209'

In [144]:
# You can also create the pandas series directly

major = pd.Series({
        'Carlos Goes': 'Economics',
        "Nicolas Powidayko": 'Economics',
        "Alexander Rabbat": 'International Affairs',
        "Dani Alaino": 'International Affairs',
        "Lya Nikate": 'International Affairs',
        "Niz Borroz": 'International Affairs',
        "Tom Rundal": "Economics"
        })

gpa = pd.Series({
        'Carlos Goes': 4.0,
        "Nicolas Powidayko": 3.8,
        "Alexander Rabbat": 2.8,
        "Dani Alaino": 3.4,
        "Lya Nikate": 3.3,
        "Niz Borroz": 2.0,
        "Tom Rundal": 3.0
        })

In [145]:
# Now join the Series together into a dataframe
    # (which is basically a table with a bunch of columns)

df = pd.DataFrame([idnumber, major, gpa], index=['idnumber','major', 'gpa'])
df

Unnamed: 0,Alexander Rabbat,Carlos Goes,Dani Alaino,Lya Nikate,Nicolas Powidayko,Niz Borroz,Tom Rundal
idnumber,08/21346,06/99209,07/20345,09/23567,10/22290,11/22035,98/20145
major,International Affairs,Economics,International Affairs,International Affairs,Economics,International Affairs,Economics
gpa,2.8,4,3.4,3.3,3.8,2,3


In [146]:
# Interesting, but is there a better way to loot at these data?
    # Let's transpose the dataframe

df = df.T
df

Unnamed: 0,idnumber,major,gpa
Alexander Rabbat,08/21346,International Affairs,2.8
Carlos Goes,06/99209,Economics,4.0
Dani Alaino,07/20345,International Affairs,3.4
Lya Nikate,09/23567,International Affairs,3.3
Nicolas Powidayko,10/22290,Economics,3.8
Niz Borroz,11/22035,International Affairs,2.0
Tom Rundal,98/20145,Economics,3.0


In [147]:
# Now you can use return data in different ways

#   Using ".loc", you call the index

df.loc['Carlos Goes']

idnumber     06/99209
major       Economics
gpa                 4
Name: Carlos Goes, dtype: object

In [148]:
# And can also ask for a specific series for one index

df.loc['Carlos Goes', 'idnumber']

'06/99209'

In [149]:
# Using the brackets, first you call the series, then one specific index

df['idnumber']['Carlos Goes']

'06/99209'

In [150]:
# Boolean masking

(df['major'] == 'Economics')

Alexander Rabbat     False
Carlos Goes           True
Dani Alaino          False
Lya Nikate           False
Nicolas Powidayko     True
Niz Borroz           False
Tom Rundal            True
Name: major, dtype: bool

In [151]:
df[df['major'] == 'Economics']

Unnamed: 0,idnumber,major,gpa
Carlos Goes,06/99209,Economics,4.0
Nicolas Powidayko,10/22290,Economics,3.8
Tom Rundal,98/20145,Economics,3.0


In [152]:
df[df['gpa'] <= 3]

Unnamed: 0,idnumber,major,gpa
Alexander Rabbat,08/21346,International Affairs,2.8
Niz Borroz,11/22035,International Affairs,2.0
Tom Rundal,98/20145,Economics,3.0


In [153]:
df['gpa'] *= 2
df

Unnamed: 0,idnumber,major,gpa
Alexander Rabbat,08/21346,International Affairs,5.6
Carlos Goes,06/99209,Economics,8.0
Dani Alaino,07/20345,International Affairs,6.8
Lya Nikate,09/23567,International Affairs,6.6
Nicolas Powidayko,10/22290,Economics,7.6
Niz Borroz,11/22035,International Affairs,4.0
Tom Rundal,98/20145,Economics,6.0


In [154]:
df['gpa'] /= 2
df

Unnamed: 0,idnumber,major,gpa
Alexander Rabbat,08/21346,International Affairs,2.8
Carlos Goes,06/99209,Economics,4.0
Dani Alaino,07/20345,International Affairs,3.4
Lya Nikate,09/23567,International Affairs,3.3
Nicolas Powidayko,10/22290,Economics,3.8
Niz Borroz,11/22035,International Affairs,2.0
Tom Rundal,98/20145,Economics,3.0


In [155]:
np.sum(df['gpa'])/len(df['gpa'])

3.185714285714286

In [156]:
np.mean(df['gpa'])

3.1857142857142859

In [157]:
%timeit 10

# Iterable

for name in df['major'].unique():
    avg = np.mean(df['gpa'][df['major'] == name])
    print('Mean GPA, ' + name + ': ' + str(avg))

100000000 loops, best of 3: 8.25 ns per loop
Mean GPA, International Affairs: 2.875
Mean GPA, Economics: 3.6


In [158]:
%timeit 10

# Using groups by

for name, table in df.groupby('major'):
    avg = np.mean(table['gpa'])
    print('Mean GPA, ' + name + ': ' + str(avg))

100000000 loops, best of 3: 7.15 ns per loop
Mean GPA, Economics: 3.6
Mean GPA, International Affairs: 2.875


In [172]:
maxmin = (df.groupby('major')['gpa']
            .agg({'max': 'max',
                  'min': 'min',
                  'count': 'count',
                  'sum': 'sum',
                 }))

maxmin['mean'] = maxmin['sum'] / maxmin['count'] 

maxmin

Unnamed: 0_level_0,max,min,count,sum,mean
major,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Economics,4.0,3.0,3,10.8,3.6
International Affairs,3.4,2.0,4,11.5,2.875
