In [2]:
import pandas as pd
import numpy as np



In [4]:
df = pd.DataFrame(data= np.array([
    [1,2,3],[4,5,6],[7,8,9]
]), index=[2,'A',4], columns=[48,49,50])

In [5]:
df

Unnamed: 0,48,49,50
2,1,2,3
A,4,5,6
4,7,8,9


In [7]:
df.shape

(3, 3)

In [8]:
# location of 1st row
print(df.loc[2])

48    1
49    2
50    3
Name: 2, dtype: int32


In [9]:
# location of last row
print(df.iloc[2])

48    7
49    8
50    9
Name: 4, dtype: int32


In [None]:
#change row values
df.loc[2] = [11,12,13]

In [10]:
df

Unnamed: 0,48,49,50
2,1,2,3
A,4,5,6
4,7,8,9


In [11]:
# add new column to df
df.loc[:,'Grade'] = ['A','B', 'C']

In [12]:
df


Unnamed: 0,48,49,50,Grade
2,1,2,3,A
A,4,5,6,B
4,7,8,9,C


In [13]:
# add new col year
df['year'] = [2018, 2017, 2016]

In [14]:
df


Unnamed: 0,48,49,50,Grade,year
2,1,2,3,A,2018
A,4,5,6,B,2017
4,7,8,9,C,2016


In [15]:
# Broadcasting
df['year'] = np.NaN

In [17]:
# print name of index
print(df.index.name)


None


In [18]:
df.loc[:, 'id']= 10001

In [19]:
df

Unnamed: 0,48,49,50,Grade,year,id
2,1,2,3,A,,10001
A,4,5,6,B,,10001
4,7,8,9,C,,10001


In [53]:
# creating new df, reset the index
# level 0 = index and level 1 = columns
# drop=false does not drop the index but turns it into a column
df_reset = df.reset_index(level=0, drop=False)

In [54]:
df_reset

Unnamed: 0,index,48,49,50,Grade,year,id
0,2,1,2,3,A,,10001
1,A,4,5,6,B,,10001
2,4,7,8,9,C,,10001


In [55]:
# rename columns
df_reset = df_reset.rename(columns=
                           {"index":"new_index"} )

In [56]:
df_reset

Unnamed: 0,new_index,48,49,50,Grade,year,id
0,2,1,2,3,A,,10001
1,A,4,5,6,B,,10001
2,4,7,8,9,C,,10001


In [57]:
df_reset.shape

(3, 7)

In [58]:
# This does not commit the change unless it is assigned to a new df - so be careful
df_reset.drop(df_reset.index[1], axis='rows')

Unnamed: 0,new_index,48,49,50,Grade,year,id
0,2,1,2,3,A,,10001
2,4,7,8,9,C,,10001


In [59]:
# delete a row by calling drop function and passing row identifier and axis, which is 'rows', inplace=True 
df_reset.drop(df_reset.index[1], axis='rows', inplace=True)

In [60]:
df_reset

Unnamed: 0,new_index,48,49,50,Grade,year,id
0,2,1,2,3,A,,10001
2,4,7,8,9,C,,10001


In [61]:
# reset index to make it more correct
# default is drop=False; hence it kept the old index as additional column
df_reset.reset_index(inplace=True)

In [62]:
df_reset

Unnamed: 0,index,new_index,48,49,50,Grade,year,id
0,0,2,1,2,3,A,,10001
1,2,4,7,8,9,C,,10001


In [63]:
# drop multiple columns in one go
df_reset.drop(['index', 'new_index'], axis='columns', inplace=True)


In [64]:
df_reset

Unnamed: 0,48,49,50,Grade,year,id
0,1,2,3,A,,10001
1,7,8,9,C,,10001


In [65]:
# change column name
newcols = {48:'subject_1',
           49: 'subject_2',
            50: 'subject_3'}
df_reset.rename(columns=newcols, inplace=True)

In [66]:
df_reset

Unnamed: 0,subject_1,subject_2,subject_3,Grade,year,id
0,1,2,3,A,,10001
1,7,8,9,C,,10001


In [67]:
# replace/modify data
df_reset.replace([6,7,8,9,11,12,13],
                 ['aweful', 'ok', 'acceptable', 'fair', 'better', 'good', 'perfect'])

Unnamed: 0,subject_1,subject_2,subject_3,Grade,year,id
0,1,2,3,A,,10001
1,ok,acceptable,fair,C,,10001


In [68]:
# add new column
df_reset['score'] = [85, 90]

In [69]:
df_reset

Unnamed: 0,subject_1,subject_2,subject_3,Grade,year,id,score
0,1,2,3,A,,10001,85
1,7,8,9,C,,10001,90


In [70]:
# create a function
doubler = lambda x: x*2

In [71]:
# create new col Score Doubled and apply to every value in the score column
df_reset['Score Doubled'] = df_reset['score'].apply(doubler)

In [72]:
df_reset

Unnamed: 0,subject_1,subject_2,subject_3,Grade,year,id,score,Score Doubled
0,1,2,3,A,,10001,85,170
1,7,8,9,C,,10001,90,180


In [104]:
# drop the year columns
df_onlyNums = df_reset.drop('year', axis='columns')

In [105]:
df_onlyNums

Unnamed: 0,subject_1,subject_2,subject_3,Grade,id,score,Score Doubled
0,1,2,3,A,10001,85,170
1,7,8,9,C,10001,90,180


In [106]:
df_onlyNums.loc[2] = df_onlyNums.loc[1].apply(doubler)

In [107]:
df_onlyNums

Unnamed: 0,subject_1,subject_2,subject_3,Grade,id,score,Score Doubled
0,1,2,3,A,10001,85,170
1,7,8,9,C,10001,90,180
2,14,16,18,CC,20002,180,360


In [108]:
df_onlyNums.drop('Grade', axis='columns', inplace=True)

In [109]:
df_onlyNums

Unnamed: 0,subject_1,subject_2,subject_3,id,score,Score Doubled
0,1,2,3,10001,85,170
1,7,8,9,10001,90,180
2,14,16,18,20002,180,360
