# Install Pandas

In [1]:
pip install pandas




In [2]:
import pandas as pd

## Ways to create empty DataFrame

In [None]:
# 1. Create empty DataFrame

In [3]:
df = pd.DataFrame()

In [4]:
df

In [5]:
print(df)

Empty DataFrame
Columns: []
Index: []


In [None]:
# 2. Create DataFrame using List

In [6]:
py_list = ['Python', 'ML', 'Django', 'Paandas']

In [7]:
df1 = pd.DataFrame(py_list)

In [8]:
df1

Unnamed: 0,0
0,Python
1,ML
2,Django
3,Paandas


In [9]:
print(df1)

         0
0   Python
1       ML
2   Django
3  Paandas


In [None]:
# 3. Craete DataFrame using list of list

In [10]:
py_list1 = [['Python','ML','Django'],['SQL','Docker']]

In [11]:
df2 = pd.DataFrame(py_list1)

In [12]:
df2

Unnamed: 0,0,1,2
0,Python,ML,Django
1,SQL,Docker,


In [13]:
# 4. Create DataFrame using list of tuple

In [63]:
tuple_list = [('1/2/2021', 'John', 100000), ('3/1/2020', 'Harry', 'DS'), ('27/6/2019', 'Justin',98000)]

In [64]:
df3 = pd.DataFrame(tuple_list)

In [65]:
df3

Unnamed: 0,0,1,2
0,1/2/2021,John,100000
1,3/1/2020,Harry,DS
2,27/6/2019,Justin,98000


In [None]:
# have columns name

In [18]:
df4 = pd.DataFrame(tuple_list, columns=['Date of Joining', "Name", 'Role'])

In [19]:
df4

Unnamed: 0,Date of Joining,Name,Role
0,1/2/2021,John,ML
1,3/1/2020,Harry,DS
2,27/6/2019,Justin,Data Analyst


In [None]:
# 5. Create DataFrame using list of Dictionary

In [50]:
dict_list = [{'id': 101, 'Name': 'John'}, {'id': 102, 'Name': 'Dassy', 'Sal': 80000}, {'id': 103, 'Name': 'Marry', 'Sal': 94000}]

In [51]:
df5 = pd.DataFrame(dict_list)

In [52]:
df5

Unnamed: 0,id,Name,Sal
0,101,John,
1,102,Dassy,80000.0
2,103,Marry,94000.0


In [None]:
# 6. Create DataFrame using Dictionary of list
# Values must be list, tuple

In [29]:
py_dict = {'id': 201, 'Name': 'Daniel', 'Sal': 98000}

In [30]:
df6 = pd.DataFrame(py_dict)

ValueError: If using all scalar values, you must pass an index

In [39]:
py_dict2 = {'id': {201}, 'Name': {'Daniel'}, 'Sal': {98000}}

In [40]:
df8 = pd.DataFrame(py_dict2)

TypeError: Set type is unordered

In [None]:
# All arrays/list must be same length

In [41]:
py_dict1 = {'id': [201, 202], 'Name': ['Daniel'], 'Sal': [98000]}

In [42]:
df7 = pd.DataFrame(py_dict1)

ValueError: arrays must all be same length

In [33]:
df7

Unnamed: 0,id,Name,Sal
0,201,Daniel,98000


In [None]:
# 7. Create DataFrame using Dict values as tuple

In [43]:
tuple_dict = {'id': (301,302,303), 'Name': ('Michu', 'Gill', 'Daniel'), 'Sal': (98000,76000,82000)}

In [44]:
df9 = pd.DataFrame(tuple_dict)

In [45]:
df9

Unnamed: 0,id,Name,Sal
0,301,Michu,98000
1,302,Gill,76000
2,303,Daniel,82000


## Read file from different sources

In [None]:
# 1. Create DataFrame by reading csv file

In [57]:
df10 = pd.read_csv('sample_csv.csv')

In [58]:
df10

Unnamed: 0,id,name,sal,role
0,101,'Kat',90000.0,'ML'
1,102,'Kevin',87000.0,'DS'
2,103,'Justin',96000.0,'Python Developer'
3,104,'Harry',,'Data Analyst'
4,105,'',91000.0,


In [None]:
# 2. Create DataFrame by reading JSON file

In [66]:
# JSON file contain dict
# values is list of dict
df11 = pd.read_json('C:\\Users\\kisan\\Desktop\\PycharmProjects\\DjangoVirtualEnv\\AdvancePython\\sample.json')

In [67]:
df11

Unnamed: 0,emp_details
0,"{'name': 'Joy', 'Department': 'IT', 'Location'..."
1,"{'name': 'Harry', 'Department': 'HR', 'Locatio..."
2,"{'name': 'Danial', 'Department': 'Finance', 'L..."


In [69]:
# JSON file contains list of dict
df12 = pd.read_json('sample_json.json')

In [70]:
df12

Unnamed: 0,name,Department,Location,Status
0,Joy,IT,Mumbai,False
1,Harry,HR,Pune,False
2,Danial,Finance,Chennai,True


In [None]:
# 3. Create DataFrame by reading excel file
# 1-- By default read 1st sheet
# 2-- Use sheet_name to access particular sheet

In [71]:
df13 = pd.read_excel('sample_excel.xlsx')

In [72]:
df13

Unnamed: 0,id,name,sal,role
0,101,Kal,90000,ML
1,102,Kevin,87000,DS
2,103,John,96000,Python Dev
3,104,Harry,54000,Data Analyst
4,105,Justin,91000,ML


In [74]:
df14 = pd.read_excel('sample_excel.xlsx', sheet_name='Sheet2')

In [75]:
df14

Unnamed: 0,id,name,sal,role
0,101,Kal,90000,ML
1,102,Kevin,87000,DS
2,103,John,96000,Python Dev
3,104,Harry,54000,Data Analyst
4,105,Justin,91000,ML
5,101,Kal,90000,ML
6,102,Kevin,87000,DS
7,103,John,96000,Python Dev


In [None]:
# Create DataFrame from website

In [77]:
df15 = pd.read_html('https://en.wikipedia.org/wiki/List_of_Bollywood_films_of_2021')

In [79]:
df15[2]

Unnamed: 0,Rank,Title,Production Company,Distributor,Worldwide Gross,Ref.
0,1,Sooryavanshi,Reliance EntertainmentRohit Shetty PicturezDha...,Reliance EntertainmentPVR Pictures,₹294.17 crore (US$39 million),[1][2]
1,2,83*,Reliance EntertainmentPhantom FilmsVibri Media...,Reliance EntertainmentPVR Pictures,₹192.19 crore (US$26 million),[3][2]
2,3,Antim: The Final Truth,Salman Khan Films,Zee Studios,₹58.37 crore (US$7.8 million),[4][2]
3,4,Bell Bottom,Pooja EntertainmentEmmay Entertainment,Pen Marudhar Entertainment,₹50.58 crore (US$6.7 million),[5][2]
4,5,Chandigarh Kare Aashiqui,T-SeriesGuy in the Sky Pictures,AA Films,₹38.57 crore (US$5.1 million),[6][2]
5,6,Tadap,Fox Star StudiosNadiadwala Grandson Entertainment,Fox Star Studios,₹34.86 crore (US$4.6 million),[7][2]
6,7,Roohi,Maddock FilmsJio Studios,Jio Studios,₹30.33 crore (US$4.0 million),[8][2]
7,8,Mumbai Saga,T-SeriesWhite Feather Films,AA Films,₹22.29 crore (US$3.0 million),[9][2]
8,9,Bunty Aur Babli 2,Yash Raj Films,Yash Raj Films,₹22.12 crore (US$2.9 million),[10][2]
9,10,Radhe,Zee StudiosSalman Khan FilmsSohail Khan Produc...,Zee StudiosZee PlexZEE5,₹18.33 crore (US$2.4 million),[a][11][2]


In [80]:
df15[2].shape

(10, 6)

In [None]:
# Create DataFrame by reading JSON file on website using URL

In [81]:
df16 = pd.read_json('https://api.sampleapis.com/baseball/hitsSingleSeason')

In [82]:
df16

Unnamed: 0,Rank,Player,AgeThatYear,Hits,Year,Bats,id
0,1,Ichiro Suzuki,30,262,2004,L,1
1,2,George Sisler,27,257,1920,L,2
2,3,Lefty O'Doul,32,254,1929,L,3
3,,Bill Terry,31,254,1930,L,4
4,5,Al Simmons,23,253,1925,R,5
...,...,...,...,...,...,...,...
529,,Paul Waner,26,200,1929,L,530
530,,Billy Werber,26,200,1934,R,531
531,,Bill White,29,200,1963,L,532
532,,Pinky Whitney,24,200,1929,R,533
