# Basic Pandas

## Create a new dataframe

In [1]:
import pandas as pd

In [2]:
# create dataframe from scratch
raw_data = {
    "name": ["Mon", "Joe", "Mary", "John", "Anna"],
    "age": [21, 25, 20, 22, 31],
    "gender": ["M", "M", "F", "M", "F"]
}

df = pd.DataFrame(raw_data)

df

Unnamed: 0,name,age,gender
0,Mon,21,M
1,Joe,25,M
2,Mary,20,F
3,John,22,M
4,Anna,31,F


## Add a new column

In [3]:
# add new column
df['city'] = ['London', 'London', 'London', 'Manchester', 'Liverpool']

In [4]:
df

Unnamed: 0,name,age,gender,city
0,Mon,21,M,London
1,Joe,25,M,London
2,Mary,20,F,London
3,John,22,M,Manchester
4,Anna,31,F,Liverpool


In [5]:
df.shape

(5, 4)

## Drop a column

In [6]:
# remove index = 2
df = df.drop(2, axis=0)

In [7]:
df

Unnamed: 0,name,age,gender,city
0,Mon,21,M,London
1,Joe,25,M,London
3,John,22,M,Manchester
4,Anna,31,F,Liverpool


In [8]:
# drop column city
df = df.drop('city', axis=1)

In [9]:
df

Unnamed: 0,name,age,gender
0,Mon,21,M
1,Joe,25,M
3,John,22,M
4,Anna,31,F


In [10]:
# reset index
df = df.reset_index(drop = True)
df

Unnamed: 0,name,age,gender
0,Mon,21,M
1,Joe,25,M
2,John,22,M
3,Anna,31,F


## Rename columns

In [11]:
df.columns

Index(['name', 'age', 'gender'], dtype='object')

In [12]:
# column names
list(df.columns)

['name', 'age', 'gender']

In [13]:
# rename columns
df.columns = ['nickname', 'age', 'sex']
df

Unnamed: 0,nickname,age,sex
0,Mon,21,M
1,Joe,25,M
2,John,22,M
3,Anna,31,F


## Series vs. DataFrame

In [14]:
type(df['nickname'])

pandas.core.series.Series

In [15]:
type(df)

pandas.core.frame.DataFrame

In [16]:
# create a new series
s1 = pd.Series(['Mary', 20, 'F'], index=['nickname', 'age', 'sex'])
print(s1)
print(type(s1))

nickname    Mary
age           20
sex            F
dtype: object
<class 'pandas.core.series.Series'>


In [17]:
# append s1 to df
df = df.append(s1, ignore_index=True)
df

  df = df.append(s1, ignore_index=True)


Unnamed: 0,nickname,age,sex
0,Mon,21,M
1,Joe,25,M
2,John,22,M
3,Anna,31,F
4,Mary,20,F


In [18]:
# create a new column using s2
s2 = pd.Series(['London', 'London', 'London', 'Manchester', 'Liverpool'])
df['city'] = s2
df

Unnamed: 0,nickname,age,sex,city
0,Mon,21,M,London
1,Joe,25,M,London
2,John,22,M,London
3,Anna,31,F,Manchester
4,Mary,20,F,Liverpool


## Write csv file

In [19]:
# write csv file
df.to_csv('mydata.csv')

## Import csv file

In [20]:
# import csv file
df2 = pd.read_csv('Data/data.csv')
df2

Unnamed: 0,id,name,city
0,1,John,London
1,2,Joe,Liverpool
2,3,Mary,Manchester
3,4,Anna,Swansea
4,5,David,London


## Import Excel

In [21]:
# import excel file
import pandas as pd
df3 = pd.read_excel('Data/data.xlsx')

In [22]:
df3

Unnamed: 0,id,name,city
0,1,John,London
1,2,Joe,Liverpool
2,3,Mary,Manchester
3,4,Anna,Swansea
4,5,David,London


## Import JSON file

In [23]:
# import json
df4 = pd.read_json('Data/data.json')
df4

Unnamed: 0,ebook,language,amazonRating,myFavorite
0,Getting started with Python,python,4.89,True
1,Introduction to R,r,4.88,False
2,SQL for Beginners,sql,4.75,True


In [24]:
df4['myFavorite'].dtype

dtype('bool')