In [1]:
person = {
    "first": "Corey", 
    "last": "Schafer", 
    "email": "CoreyMSchafer@gmail.com"
}

In [2]:
people = {
    "first": ["Mark", 'Jane', 'John'], 
    "last": ["Twain", 'Doe', 'Doe'], 
    "email": ["marktwain@mailmail.com", 'JaneDoe@email.com', 'JohnDoe@email.com']
}

In [3]:
import pandas as pd

In [4]:
df = pd.DataFrame(people)

In [5]:
df

Unnamed: 0,first,last,email
0,Mark,Twain,marktwain@mailmail.com
1,Jane,Doe,JaneDoe@email.com
2,John,Doe,JohnDoe@email.com


In [6]:
df['email']

0    marktwain@mailmail.com
1         JaneDoe@email.com
2         JohnDoe@email.com
Name: email, dtype: object

In [7]:
type(df['email']) #series - row of data (1-d data)

pandas.core.series.Series

In [8]:
df.email #When using this way, column name shouldn't be the same as a method name of dataframe

0    marktwain@mailmail.com
1         JaneDoe@email.com
2         JohnDoe@email.com
Name: email, dtype: object

In [9]:
df[['last','email']] #nested list(2-d)

Unnamed: 0,last,email
0,Twain,marktwain@mailmail.com
1,Doe,JaneDoe@email.com
2,Doe,JohnDoe@email.com


In [10]:
df.columns

Index(['first', 'last', 'email'], dtype='object')

In [11]:
df.iloc[0] #integer location - return series which contain the values of the row

first                      Mark
last                      Twain
email    marktwain@mailmail.com
Name: 0, dtype: object

In [12]:
df.iloc[[0,1]]

Unnamed: 0,first,last,email
0,Mark,Twain,marktwain@mailmail.com
1,Jane,Doe,JaneDoe@email.com


In [13]:
df.iloc[[0,1],[2]] #column with index 2 from the rows (here - email)

Unnamed: 0,email
0,marktwain@mailmail.com
1,JaneDoe@email.com


In [14]:
df

Unnamed: 0,first,last,email
0,Mark,Twain,marktwain@mailmail.com
1,Jane,Doe,JaneDoe@email.com
2,John,Doe,JohnDoe@email.com


In [15]:
df.loc[0]

first                      Mark
last                      Twain
email    marktwain@mailmail.com
Name: 0, dtype: object

In [16]:
df.loc[[0,2],'email']

0    marktwain@mailmail.com
2         JohnDoe@email.com
Name: email, dtype: object

In [17]:
df.loc[[0,2],['email','first']]

Unnamed: 0,email,first
0,marktwain@mailmail.com,Mark
2,JohnDoe@email.com,John


In [18]:
df

Unnamed: 0,first,last,email
0,Mark,Twain,marktwain@mailmail.com
1,Jane,Doe,JaneDoe@email.com
2,John,Doe,JohnDoe@email.com


### Index

In [19]:
# Index - unique value (like primary key)
df.set_index('email',inplace = True) #add inplace = True attribute to make changes in the actual dataframe

In [20]:
df

Unnamed: 0_level_0,first,last
email,Unnamed: 1_level_1,Unnamed: 2_level_1
marktwain@mailmail.com,Mark,Twain
JaneDoe@email.com,Jane,Doe
JohnDoe@email.com,John,Doe


In [21]:
df.index

Index(['marktwain@mailmail.com', 'JaneDoe@email.com', 'JohnDoe@email.com'], dtype='object', name='email')

In [22]:
df.loc['marktwain@mailmail.com','first':'last'] #123 won't work anymore, but iloc still works

first     Mark
last     Twain
Name: marktwain@mailmail.com, dtype: object

In [23]:
df.reset_index(inplace = True) #reset index
df

Unnamed: 0,email,first,last
0,marktwain@mailmail.com,Mark,Twain
1,JaneDoe@email.com,Jane,Doe
2,JohnDoe@email.com,John,Doe


In [24]:
df

Unnamed: 0,email,first,last
0,marktwain@mailmail.com,Mark,Twain
1,JaneDoe@email.com,Jane,Doe
2,JohnDoe@email.com,John,Doe


In [25]:
df

Unnamed: 0,email,first,last
0,marktwain@mailmail.com,Mark,Twain
1,JaneDoe@email.com,Jane,Doe
2,JohnDoe@email.com,John,Doe


In [26]:
filt = (df['last'] == 'Doe') #Define a filter

In [27]:
df[filt] #Apply filter to dataframe
df[df['last'] == 'Doe']
df.loc[filt] #Preferred

Unnamed: 0,email,first,last
1,JaneDoe@email.com,Jane,Doe
2,JohnDoe@email.com,John,Doe


In [30]:
filt2 = (df['last'] == 'Doe' ) & (df['first'] == 'John')
filt3 = (df['last'] != 'Doe' ) | (df['first'] == 'Mark')

In [31]:
df.loc[filt2,'first':'last']
df.loc[filt3]

Unnamed: 0,email,first,last
0,marktwain@mailmail.com,Mark,Twain


In [32]:
df.columns

Index(['email', 'first', 'last'], dtype='object')

In [34]:
df.columns = ['first_name','last_name','email']

In [37]:
df.columns = [x.upper() for x in df.columns]
df

Unnamed: 0,FIRST_NAME,LAST_NAME,EMAIL
0,marktwain@mailmail.com,Mark,Twain
1,JaneDoe@email.com,Jane,Doe
2,JohnDoe@email.com,John,Doe


In [38]:
df.columns = [x.lower() for x in df.columns]
df.columns = df.columns.str.replace(' ','_') #Replace space in column names with spaces

In [41]:
df.rename(columns = {'first_name':'first','last_name':'last'},inplace = True)

In [45]:
df.columns = ['email','first','last']
df

Unnamed: 0,email,first,last
0,marktwain@mailmail.com,Mark,Twain
1,JaneDoe@email.com,Jane,Doe
2,JohnDoe@email.com,John,Doe


In [51]:
df.loc[2] = ['JohnDoe@email.com','John','Smith']
df.loc[2,'last'] = 'Smithian'
df

Unnamed: 0,email,first,last
0,marktwain@mailmail.com,Mark,Twain
1,JaneDoe@email.com,Jane,Doe
2,JohnDoe@email.com,John,Smithian
