# Create Pandas Dataframe in different ways
# From List

In [1]:
import pandas as pd

data=['Saira','Mitu','Asri']

df=pd.DataFrame(data,columns=['name'])

df

Unnamed: 0,name
0,Saira
1,Mitu
2,Asri


# From List of lists

In [2]:
data=[['Cse312',30,2.5],['Cse312',40,3.6],['Cse313',50]]
heading=['Subject','Number','CGPA']

df=pd.DataFrame(data,columns=heading)

df

Unnamed: 0,Subject,Number,CGPA
0,Cse312,30,2.5
1,Cse312,40,3.6
2,Cse313,50,


# From dict of narray/lists

In [3]:
data= {
    'Name':['Saira','Mitu','Asri'],
    'Age':[20,30,10]
}

df=pd.DataFrame(data)

df

Unnamed: 0,Name,Age
0,Saira,20
1,Mitu,30
2,Asri,10


# Indexes DataFrame

In [4]:
data= {
    'Name':['Saira','Mitu','Asri'],
    'Age':[20,30,10]
}

df=pd.DataFrame(data, index=['Rank1','Rank2','Rank3'])

df

Unnamed: 0,Name,Age
Rank1,Saira,20
Rank2,Mitu,30
Rank3,Asri,10


# From list of dicts

In [5]:
data = [{'b': 2, 'c':3},
        {'a':10, 'b': 20, 'c': 30}]

df=pd.DataFrame(data)

df

Unnamed: 0,b,c,a
0,2,3,
1,20,30,10.0


# Both row index as well as column index.

In [6]:
data = [{'a': 1, 'b': 2},
        {'a': 5, 'b': 10, 'c': 20}]

df1 = pd.DataFrame(data, index =['first',
                                 'second'],
                   columns =['a', 'b','c1'])

df1

Unnamed: 0,a,b,c1
first,1,2,
second,5,10,


# From zip() function

In [7]:
List1=['Apple','Orange','Papaya']

List2=['Vanila','Creamy','Choco']

list_of_tuples=list(zip(List1,List2))

df=pd.DataFrame(list_of_tuples,columns=['Fruit','Ice Cream'])

df

Unnamed: 0,Fruit,Ice Cream
0,Apple,Vanila
1,Orange,Creamy
2,Papaya,Choco


# Dicts of series

In [8]:
data = {'Name':pd.Series(['Tom','James','Ricky','Vin','Steve','Smith','Jack','Lee','David','Gasper','Betina','Peter']),
     'Age':pd.Series([25,26,25,23,30,29,23,34,40,30,51,23]),
     'Rating':pd.Series([4.23,3.24,3.98,2.56,3.20,4.6,3.8,3.78,2.98,4.80,4.10])}

df = pd.DataFrame(data, index=[1,0,4,3,6,2,5,9,7,10,8,11])

df

Unnamed: 0,Name,Age,Rating
1,James,26,3.24
0,Tom,25,4.23
4,Steve,30,3.2
3,Vin,23,2.56
6,Jack,23,3.8
2,Ricky,25,3.98
5,Smith,29,4.6
9,Gasper,30,4.8
7,Lee,34,3.78
10,Betina,51,4.1


# DataFrame to CSV

In [9]:
df.to_csv("new_data.csv",index=False)

# Mean, Median, Mode

In [10]:
# mean from dataframe
df['Age'].mean()

29.916666666666668

In [11]:
# median from dataframe
df['Age'].median()

27.5

In [12]:
# mode from dataframe
df['Age'].mode()

0    23
dtype: int64

#  Column Selection

In [13]:
# select a single column
df['Age']
df.Age

1     26
0     25
4     30
3     23
6     23
2     25
5     29
9     30
7     34
10    51
8     40
11    23
Name: Age, dtype: int64

In [14]:
# multiple column selection
df[['Name','Age']].head(4)

Unnamed: 0,Name,Age
1,James,26
0,Tom,25
4,Steve,30
3,Vin,23


In [15]:
df.iloc[:,0].head(4) # first column of data frame

1    James
0      Tom
4    Steve
3      Vin
Name: Name, dtype: object

In [16]:
df.iloc[:,-1].head(4) # last column of data frame

1    3.24
0    4.23
4    3.20
3    2.56
Name: Rating, dtype: float64

In [17]:
df.iloc[:, 0:2].head(4) # first two columns of data frame

Unnamed: 0,Name,Age
1,James,26
0,Tom,25
4,Steve,30
3,Vin,23


# Row Selection

In [18]:
# row selection
df[2:3]

Unnamed: 0,Name,Age,Rating
4,Steve,30,3.2


In [19]:
# use of iloc

# Single selections using iloc

# Rows:
df.iloc[2] # Third row of data frame

Name      Steve
Age          30
Rating      3.2
Name: 4, dtype: object

In [20]:
df.iloc[-1] # last row of data frame

Name      Peter
Age          23
Rating      NaN
Name: 11, dtype: object

In [21]:
df.iloc[0:4] # first four rows of dataframe

Unnamed: 0,Name,Age,Rating
1,James,26,3.24
0,Tom,25,4.23
4,Steve,30,3.2
3,Vin,23,2.56


# Row and Column select

In [22]:
df.iloc[[0,3,6], [0,1]] # 1st, 4th, 7th row + 1st , 2nd columns

Unnamed: 0,Name,Age
1,James,26
3,Vin,23
5,Smith,29


In [23]:
df.iloc[0:4, 1:3] # first 4 rows and 2nd, 3rd columns of data frame

Unnamed: 0,Age,Rating
1,26,3.24
0,25,4.23
4,30,3.2
3,23,2.56


In [24]:
# loc
df.loc[0] # single row, all columns

Name       Tom
Age         25
Rating    4.23
Name: 0, dtype: object

In [25]:
df.loc[[0,2,4]] # multiple row, all columns

Unnamed: 0,Name,Age,Rating
0,Tom,25,4.23
2,Ricky,25,3.98
4,Steve,30,3.2
