In [1]:
import pandas as pd

### Creating a Dataframe from a list of lists
pd.DataFrame(listName, columns=[])

In [35]:
sample_data = [["Theseus", 20, "Tufts", None], ["Keagan", 21, "UofM", "Darin"], ["Jimmy", 21, "Tufts", "Alyssa"], ["Piplup", 2, None, None]]
df = pd.DataFrame(sample_data, columns=["Name", "Age", "School", "Partner"])

### Changing dtype
astype('') - int, object, bool, etc.

Can set flag errors='ignore', will return original on error

In [4]:
df['Name'].astype('int', errors='ignore')

0    Theseus
1     Keagan
2      Jimmy
3     Piplup
Name: Name, dtype: object

### Check if column name exists

In [5]:
# Can either do in df.columns or in df

if 'Nam' in df.columns:
    print(True)
else:
    print(False)

False


### Query
Takes something like:      df[(df['International plan'] == 'No') & (df['Churn'] == False)]  
And simplifies it to something like:  df.query("`International plan` == 'No' & Churn == False")

In [5]:
# Is Empty
df.query("Partner.isna() == False")

# String Equals
df.query("Name == 'Theseus'")

# Compare Against Variable
val = 21
df.query("Age >= @val")

# If you only want to see certain fields from the results, ex. just the names of those older than 21, you can do that with the [[]] thing
df.query("Age >= @val")[["Name"]]

Unnamed: 0,Name
1,Keagan
2,Jimmy


### Eval
Takes something like: df['New Column'] = df['Total intl minutes'] + df['Total night minutes']  
And simplifies to: df['New Column'] = df.eval('`Total intl minutes` + `Total night minutes`)

In [7]:
df["Points"] = df.eval('Age + Partner.isna()')
df

Unnamed: 0,Name,Age,School,Partner,Points
0,Theseus,20,Tufts,,21
1,Keagan,21,UofM,Darin,21
2,Jimmy,21,Tufts,Alyssa,21
3,Piplup,2,,,3


### Iter

### Series
A dataframe is basically a list of index:series pairs

In [34]:
for i in range(len(df)):
    obj = pd.Series(df.iloc[i])
    print(obj, "\n\n")
df

Nome       Theseus
Age             20
School       Tufts
Partner       None
Name: 0, dtype: object 


Nome       Keagan
Age            21
School       UofM
Partner     Darin
Name: 1, dtype: object 


Nome        Jimmy
Age            21
School      Tufts
Partner    Alyssa
Name: 2, dtype: object 


Nome       Piplup
Age             2
School       None
Partner      None
Name: 3, dtype: object 




Unnamed: 0,Nome,Age,School,Partner
0,Theseus,20,Tufts,
1,Keagan,21,UofM,Darin
2,Jimmy,21,Tufts,Alyssa
3,Piplup,2,,


### Practice Using Lambda

In [17]:
# Display the name and school of anyone who's age is >= 20
df.query('Age >= 20')[["Name", "School"]]

# Items gets each column NOT each row
for colName, values in df.items():
    pass

# Set age to 0 if school == None
for i in range(len(df)):
    df.loc[i, 'Age'] = 0 if df.loc[i, 'School'] == None else df.loc[i, 'Age']


# Can rlly only use lambda if you only need to access one field on the right side
df['hasPartner'] = df['Partner'].apply(lambda x: 1 if x else 0)
df


Unnamed: 0,Name,Age,School,Partner,hasPartner
0,Theseus,20,Tufts,,0
1,Keagan,21,UofM,Darin,1
2,Jimmy,21,Tufts,Alyssa,1
3,Piplup,0,,,0


In [25]:
# Columns
for column in df.columns:
    col_obj = df[column]
    print(col_obj.values)

['Theseus' 'Keagan' 'Jimmy' 'Piplup']
[20 21 21  0]
['Tufts' 'UofM' 'Tufts' None]
[None 'Darin' 'Alyssa' None]
[0 1 1 0]


### Drop
If you pass in a dataframe via argument, ex def func1(df)


In [None]:
def func1(df):
    # This edits the copy in this function but not the original copy
    df = df.drop("ColName", axis=1)

    # This edits the copy in this function AND the original copy
    df.drop("ColName", axis=1, inplace=True)