<a href="https://colab.research.google.com/github/sakshirikhe0192/ML/blob/master/Ways_of_Creating_DataFrame.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

pandas is a fast, powerful, flexible and easy to use open source data analysis and manipulation tool, built on top of the Python programming language. Pandas has a better performance when number of rows is 500K or more

***Ways to create dataframe***

In [None]:
#1 creating an empty dataframe and filling values

import pandas as pd
import numpy as np

df=pd.DataFrame(columns=['A','B','C'])
df['A']=[1,2,3] #adding values in dataframe using columns, data type of column will be type of values 
df.loc[0]=[4,2,3] #adding values in dataframe using rows will always take object data type of columns
df.loc[1]=[4,2,3] 
df.loc[2]=[4,2,3]
df.loc[0:1,'B']=3
df.fillna(2)
df.loc[1,'C']=6
df.loc[1:2,'B']=[0,0]
print(df.dtypes)

In [None]:
#2 creating using simple numpy array

a=np.array([[1,2,3],[4,5,6],[7,8,9]])
df=pd.DataFrame(a,columns=['A','B','C'],index=['a','b','c'])

In [None]:
#3 creating using dictionary

d={'A':[1,2,3],'B':[4,5,6],'C':[7,8,9]}
df=pd.DataFrame(d,index=['a','b','c'])

In [None]:
#4 list of dictionary

ld=[{'A':1,'B':1,'C':1},
    {'A':2,'B':2}]

df=pd.DataFrame(ld)

In [None]:
#5 creating using from_dict dictionary

d={'A':[1,2,3],'B':[4,5,6],'C':[7,8,9]}
df=pd.DataFrame.from_dict(d,orient='index',columns=['a','b','c'])
df

In [None]:
#6 using read_csv file

df=pd.DataFrame('data.csv')
df

In [None]:
#7 using StringIO for csv file

from io import StringIO

s=StringIO('A,B,C\n1,2,3\n4,5,6\n7,8,9')
df=pd.DataFrame(s)

In [None]:
#8 using read_json file

df = pd.read_json('data.json',lines=True)
df

In [None]:
#9 using StringIo for JSON file

s=StringIO('{"A":1,"B":2,"C":3}\n{"A":1,"B":2,"C":3}\n{"A":1,"B":2,"C":3}')
df=pd.read_json(s,lines=True)
df

In [None]:
#10 reading html data

import requests
from bs4 import BeautifulSoup
url='http://www.nationmaster.com/country-info/stats/Media/Internet-users'


res = requests.get(url)
soup = BeautifulSoup(res.content,'lxml') #soupisthe data in parsed format
table = soup.find_all('table')[0] #here find_all will return list but we want fisrt element only so [0]
df = pd.read_html(str(table))[0]  #here find_all will return list but we want fisrt element only so [0]
df

#OR

source = pd.read_html(requests.get(url).text)



In [None]:
#11 from copy of other dataframe

df_copy = df.copy()   # copy into a new dataframe object
df_copy = df

In [None]:
#12 horizontal concatenation of dataframe

a1=np.array([[1,2,3],[4,5,6],[7,8,9]])
a2=np.array([[0,9,8],[7,6,5],[4,3,2]])

df1=pd.DataFrame(a1,columns=['A','B','C'])
df2=pd.DataFrame(a2,columns=['A','B','C'])

df3=pd.concat([df1,df2],axis=0).reset_index(drop=True)

#13 vertical concatenation of Dataframe

df4=pd.DataFrame([df1,df2],axis=1)                                        

In [None]:
#14 horizontal concatenation with inner join

d1={'A':[1,2,3],'B':[3,5,6],'C':[7,8,9]}
d2={'B':[3,5],'D':[7,8]}

df1=pd.DataFrame(d1)
df2=pd.DataFrame(d2)

df=pd.merge(df1,df2,how = 'right',on = ['B'])#right left and inner join 

#OR

d1={'A':[1,2,3],'B':[3,5,6],'C':[7,8,9]}
d2={'A':[1,5],'B':[3,5],'D':[7,8]}

df1=pd.DataFrame(d1)
df2=pd.DataFrame(d2)

df=pd.merge(df1,df2,how = 'inner',on = ['A','B'])#right left and inner join 
df

In [None]:
#15 as transpose

d1={'A':[1,2,3],'B':[3,5,6],'C':[7,8,9]}
df1=pd.DataFrame(d1)

df=df.T
df


In [None]:
#16 creating Dataframe using random int

df = pd.DataFrame(np.random.randint(low=0, high=10, size=(5, 5)),
               columns=['a', 'b', 'c', 'd', 'e'])