In [125]:
# Pandas imports
from pandas import Series, DataFrame, MultiIndex

In [92]:
# some mock data to use
population = [326.69, 328.24, 329.46]
known_for = ['party', 'birth', 'war']
years = ['2018', '2019', '2020']

In [95]:
series = Series(data = population, index = years, name = 'Population of America')
print(series) 
print(series * 1000000) # running operation for all data points
print(series[2]) # grabbing specific data point
print(series[1:]) # grabbing a range - python slicing

2018    326.69
2019    328.24
2020    329.46
Name: Population of America, dtype: float64
2018    326690000.0
2019    328240000.0
2020    329460000.0
Name: Population of America, dtype: float64
329.46
2019    328.24
2020    329.46
Name: Population of America, dtype: float64


-----------------

# DataFrame

In [99]:
# using a list of lists to define data - each inner list is a row
the_years = DataFrame( data = [[326.69, 'party'], [328.24, 'birth'], [329.46, 'war']] \
                , index = years \
                , columns=['Population', 'Known For'] )
the_years

Unnamed: 0,Population,Known For
2018,326.69,party
2019,328.24,birth
2020,329.46,war


In [105]:
# using a dict to define data - the keys correspond to the column names and the values are a list defining the column values
# an index is used to define the row names

# if the size of one of the dicts value lists is not the size of the others an error will occur
# ... unless the size of the list is 1 in which case the value is repeated like below (Column Test)
the_years = DataFrame( data = {'Population': population, 'Known For': known_for, 'Test': ['Javi']} \
                     , index = years)
the_years

Unnamed: 0,Population,Known For,Test
2018,326.69,party,Javi
2019,328.24,birth,Javi
2020,329.46,war,Javi


In [115]:
# Using a list of dicts. The dict keys/values still behave as above, but now they are added one at a time.
# If a new column is introduced (like the Test one below), then the previous column entries are filled with NaN/null
# If a list is passed as dict value then an array is placed with all values at that column index
the_years = DataFrame( data = [ \
                               {'Population': population[0], 'Known For': known_for[1]} \
                               , {'Population': population[1], 'Known For': known_for[0]} \
                               , {'Population': population[2], 'Known For': known_for[2], 'Test': 'Javi!!'} \
                              ], index = years )
the_years

Unnamed: 0,Population,Known For,Test
0,326.69,birth,
1,328.24,party,
2,329.46,war,Javi!!
3,"[326.69, 328.24, 329.46]",,


In [122]:
# Using a dict - keys = columns, values = Series = column value @ row
# Using this method the dict key will name the column and the series index will name the row
# if a series has an index with values different than previous serieses, new rows are appended to DataFrame
# and all other known rows are filled for new column (see Test below)
the_years = DataFrame( data = {'Population': Series(data = population, index = years) \
                               , 'Known For': Series(data = known_for, index = years) \
                               , 'Test': Series(data = ['Javi']) } \
                     )
the_years

Unnamed: 0,Population,Known For,Test
2018,326.69,party,
2019,328.24,birth,
2020,329.46,war,
0,,,Javi


In [78]:
model = {'IASIP': ['Frank', 'Charlie', 'Mac'], 'HIMYM': ['Barney', 'Ted', 'Lily']}
shows = DataFrame(data = model, index = ['Funny', 'Main', 'Gay?'])
shows

Unnamed: 0,IASIP,HIMYM
Funny,Frank,Barney
Main,Charlie,Ted
Gay?,Mac,Lily


In [79]:
# Getting columns from DataFrame
shows['HIMYM']

Funny    Barney
Main        Ted
Gay?       Lily
Name: HIMYM, dtype: object

In [85]:
# Getting rows from DataFrame
shows.loc[ ['Funny', 'Gay?'] ]

Unnamed: 0,IASIP,HIMYM
Funny,Frank,Barney
Gay?,Mac,Lily


In [86]:
# Getting colmuns from DataFrame using range
shows.loc['Main': 'Gay?']

Unnamed: 0,IASIP,HIMYM
Main,Charlie,Ted
Gay?,Mac,Lily


In [124]:
# Getting columns from DF using index range
shows.iloc[0:2]

Unnamed: 0,IASIP,HIMYM
Funny,Frank,Barney
Main,Charlie,Ted


In [132]:
outside_row = ['Apple', 'Apple', 'Google', 'Google']
inside_row = ['2019', '2020', '2019', '2020']
multi_index = list( zip(outside_row, inside_row) )
multi_index = MultiIndex.from_tuples(multi_index)
companies = DataFrame(data = {'Volume': [100, 110, 91, 98], 'EPS': [22, 23, 27, 28.5]}, index = multi_index )
companies

Unnamed: 0,Unnamed: 1,Volume,EPS
Apple,2019,100,22.0
Apple,2020,110,23.0
Google,2019,91,27.0
Google,2020,98,28.5
