# Dataframes

In [1]:
import pandas as pd
data = pd.read_csv('lsd_math_score_data.csv')

In [2]:
data

Unnamed: 0,Time_Delay_in_Minutes,LSD_ppm,Avg_Math_Test_Score
0,5,1.17,78.93
1,15,2.97,58.2
2,30,3.26,67.47
3,60,4.69,37.47
4,120,5.83,45.65
5,240,6.0,32.92
6,480,6.41,29.97


In [3]:
type(data)

pandas.core.frame.DataFrame

A dataframe is a collection with a clearly defined structure
Its structured in rows and columns just like a spreadsheet

You can grab columns by doing the following:  
dataframe_Name['column_name']

In [4]:
onlyMathScores = data['Avg_Math_Test_Score']

In [5]:
onlyMathScores

0    78.93
1    58.20
2    67.47
3    37.47
4    45.65
5    32.92
6    29.97
Name: Avg_Math_Test_Score, dtype: float64

## Adding Columns


In [6]:
data['Test_Subject'] = 'Jennifer Lopez'
data

Unnamed: 0,Time_Delay_in_Minutes,LSD_ppm,Avg_Math_Test_Score,Test_Subject
0,5,1.17,78.93,Jennifer Lopez
1,15,2.97,58.2,Jennifer Lopez
2,30,3.26,67.47,Jennifer Lopez
3,60,4.69,37.47,Jennifer Lopez
4,120,5.83,45.65,Jennifer Lopez
5,240,6.0,32.92,Jennifer Lopez
6,480,6.41,29.97,Jennifer Lopez


In [7]:
data['High_Score'] = 100 + data['Avg_Math_Test_Score']
data

Unnamed: 0,Time_Delay_in_Minutes,LSD_ppm,Avg_Math_Test_Score,Test_Subject,High_Score
0,5,1.17,78.93,Jennifer Lopez,178.93
1,15,2.97,58.2,Jennifer Lopez,158.2
2,30,3.26,67.47,Jennifer Lopez,167.47
3,60,4.69,37.47,Jennifer Lopez,137.47
4,120,5.83,45.65,Jennifer Lopez,145.65
5,240,6.0,32.92,Jennifer Lopez,132.92
6,480,6.41,29.97,Jennifer Lopez,129.97


In [8]:
data['High_Score'] = data['High_Score'] ** 2
data

Unnamed: 0,Time_Delay_in_Minutes,LSD_ppm,Avg_Math_Test_Score,Test_Subject,High_Score
0,5,1.17,78.93,Jennifer Lopez,32015.9449
1,15,2.97,58.2,Jennifer Lopez,25027.24
2,30,3.26,67.47,Jennifer Lopez,28046.2009
3,60,4.69,37.47,Jennifer Lopez,18898.0009
4,120,5.83,45.65,Jennifer Lopez,21213.9225
5,240,6.0,32.92,Jennifer Lopez,17667.7264
6,480,6.41,29.97,Jennifer Lopez,16892.2009


In [9]:
print(data['High_Score'][0] == (178.93 ** 2))

True


In [10]:
data['High_Score'][0]

32015.944900000002

## Series
In dataframes, columns are actually series. So a dataframe is a collection of series.  
A series is only ever one column unlike an array or list. It is strict. It does have an attribute

In [11]:
type(onlyMathScores)

pandas.core.series.Series

In [12]:
onlyMathScores

0    78.93
1    58.20
2    67.47
3    37.47
4    45.65
5    32.92
6    29.97
Name: Avg_Math_Test_Score, dtype: float64

In [13]:
columnList = ['LSD_ppm', 'Avg_Math_Test_Score']
columnList

['LSD_ppm', 'Avg_Math_Test_Score']

In [14]:
cleanData = data[columnList]
cleanData

Unnamed: 0,LSD_ppm,Avg_Math_Test_Score
0,1.17,78.93
1,2.97,58.2
2,3.26,67.47
3,4.69,37.47
4,5.83,45.65
5,6.0,32.92
6,6.41,29.97


In [15]:
data[['LSD_ppm', 'Avg_Math_Test_Score']]

Unnamed: 0,LSD_ppm,Avg_Math_Test_Score
0,1.17,78.93
1,2.97,58.2
2,3.26,67.47
3,4.69,37.47
4,5.83,45.65
5,6.0,32.92
6,6.41,29.97


In order to work with a one column dataframe you wrap the single column in a list

In [16]:
y = data[['Avg_Math_Test_Score']]
type(y)

pandas.core.frame.DataFrame

In [17]:
X = data[['LSD_ppm']]
print(X)
type(X)

   LSD_ppm
0     1.17
1     2.97
2     3.26
3     4.69
4     5.83
5     6.00
6     6.41


pandas.core.frame.DataFrame

## Deleteing Column
del dataframe_['column_name']

In [18]:
del data['Test_Subject']
del data['High_Score']
data

Unnamed: 0,Time_Delay_in_Minutes,LSD_ppm,Avg_Math_Test_Score
0,5,1.17,78.93
1,15,2.97,58.2
2,30,3.26,67.47
3,60,4.69,37.47
4,120,5.83,45.65
5,240,6.0,32.92
6,480,6.41,29.97


In [19]:
import my_module
type(my_module)

module

In [20]:
my_module.theAnswer

42