###### - Introduction to Pandas
###### - Series & DataFrame
###### - Creation of DataFrames, Exploring Data (Series and DataFrame)
###### - Operations on DataFrames

In [1]:
import pandas as pd
import numpy as np

#### Series

##### - A one-dimensional array like structure along with indices as label.

In [3]:
s1 = pd.Series([34, 67, 90, 34, 12, 67])
s1

0    34
1    67
2    90
3    34
4    12
5    67
dtype: int64

In [5]:
print(s1[0])

34


In [7]:
print(s1[0:3])

0    34
1    67
2    90
dtype: int64


In [8]:
print(s1[0::2])

0    34
2    90
4    12
dtype: int64


#### DataFrame

##### - A multi-dimensional (2D) array like structure along with indices as label.

In [10]:
df = pd.DataFrame()

print(df)

Empty DataFrame
Columns: []
Index: []


In [12]:
df = pd.DataFrame({
    'name': ["vansh", "abhay", "dhruvi"],
    'age': [20, 18, 22],
    'marks': [80, 98, 94],
})

df

Unnamed: 0,name,age,marks
0,vansh,20,80
1,abhay,18,98
2,dhruvi,22,94


In [13]:
df["name"]

0     vansh
1     abhay
2    dhruvi
Name: name, dtype: object

In [None]:
df[["name", "marks"]]

Unnamed: 0,name,marks
0,vansh,80
1,abhay,98
2,dhruvi,94


In [16]:
df["name"][0]

'vansh'

In [None]:
df[["name", "marks"]]

Unnamed: 0,name,marks
0,vansh,80
1,abhay,98
2,dhruvi,94


In [26]:
df['marks']

0    80
1    98
2    94
Name: marks, dtype: int64

In [29]:
df

Unnamed: 0,name,age,marks
0,vansh,20,80
1,abhay,18,98
2,dhruvi,22,94


In [30]:
df["city"] = ["delhi", "mumbai", "pune"]
df

Unnamed: 0,name,age,marks,city
0,vansh,20,80,delhi
1,abhay,18,98,mumbai
2,dhruvi,22,94,pune


In [31]:
df["city"] = ["surat", "baroda", "ahmedabad"]
df

Unnamed: 0,name,age,marks,city
0,vansh,20,80,surat
1,abhay,18,98,baroda
2,dhruvi,22,94,ahmedabad


In [32]:
print(df)

     name  age  marks       city
0   vansh   20     80      surat
1   abhay   18     98     baroda
2  dhruvi   22     94  ahmedabad


In [38]:
df['marks'].dtypes

dtype('int64')

In [None]:
df.dtypes

name     object
age       int64
marks     int64
city     object
dtype: object

In [34]:
type(df)

pandas.core.frame.DataFrame

In [35]:
type(s1)

pandas.core.series.Series

In [39]:
df['marks']

0    80
1    98
2    94
Name: marks, dtype: int64

In [41]:
df['marks'] + 2

0     82
1    100
2     96
Name: marks, dtype: int64

In [42]:
df['marks'] - 2

0    78
1    96
2    92
Name: marks, dtype: int64

In [43]:
df['marks'] * 2

0    160
1    196
2    188
Name: marks, dtype: int64

In [44]:
df['marks'] / 2

0    40.0
1    49.0
2    47.0
Name: marks, dtype: float64

In [45]:
df['marks'] % 2

0    0
1    0
2    0
Name: marks, dtype: int64

In [46]:
df

Unnamed: 0,name,age,marks,city
0,vansh,20,80,surat
1,abhay,18,98,baroda
2,dhruvi,22,94,ahmedabad


In [47]:
df['marks'] = df['marks'] + 2

In [48]:
df

Unnamed: 0,name,age,marks,city
0,vansh,20,82,surat
1,abhay,18,100,baroda
2,dhruvi,22,96,ahmedabad


In [51]:
type(df['marks'])

pandas.core.series.Series

In [None]:
df2 = pd.DataFrame([
    ["aryan", "vansh", "dhruvi"],
    [20, 22, 18],
    [98, 78, 86]
])

df2

Unnamed: 0,0,1,2
0,aryan,vansh,dhruvi
1,20,22,18
2,98,78,86


In [53]:
df3 = pd.DataFrame([
    ["aryan", "vansh", "dhruvi"],
    [20, 22, 18],
    [98, 78, 86]
], index=["name", "age", "marks"])

df3

Unnamed: 0,0,1,2
name,aryan,vansh,dhruvi
age,20,22,18
marks,98,78,86


In [54]:
df3 = pd.DataFrame([
    ["aryan", "vansh", "dhruvi"],
    [20, 22, 18],
    [98, 78, 86]
], columns=["name", "age", "marks"])

df3

Unnamed: 0,name,age,marks
0,aryan,vansh,dhruvi
1,20,22,18
2,98,78,86


In [55]:
type(df3)

pandas.core.frame.DataFrame

In [56]:
type(df3['name'])

pandas.core.series.Series

In [None]:
# iloc & loc

In [2]:
lol = [
    ["Alice", 25, "New York"],
    ["Bob", 30, "London"],
    ["Charlie", 22, "Paris"]
]

df2 = pd.DataFrame(lol, columns=["Name", "Age", "City"])

df2

Unnamed: 0,Name,Age,City
0,Alice,25,New York
1,Bob,30,London
2,Charlie,22,Paris


In [3]:
df2["Name"]

0      Alice
1        Bob
2    Charlie
Name: Name, dtype: object

In [4]:
df2.Name

0      Alice
1        Bob
2    Charlie
Name: Name, dtype: object

In [5]:
df2.Name[0]

'Alice'

In [6]:
df2

Unnamed: 0,Name,Age,City
0,Alice,25,New York
1,Bob,30,London
2,Charlie,22,Paris


In [7]:
df2.iloc[0]

Name       Alice
Age           25
City    New York
Name: 0, dtype: object

In [15]:
df2.iloc[0:2]

Unnamed: 0,Name,Age,City
0,Alice,25,New York
1,Bob,30,London


In [18]:
df2.iloc[0::2]

Unnamed: 0,Name,Age,City
0,Alice,25,New York
2,Charlie,22,Paris


In [19]:
df2.iloc[0:2, 0:2]

Unnamed: 0,Name,Age
0,Alice,25
1,Bob,30


In [8]:
df2.loc[0]

Name       Alice
Age           25
City    New York
Name: 0, dtype: object

In [9]:
# How to set index from existing column

df3 = df2.set_index("Name")

In [10]:
df3

Unnamed: 0_level_0,Age,City
Name,Unnamed: 1_level_1,Unnamed: 2_level_1
Alice,25,New York
Bob,30,London
Charlie,22,Paris


In [11]:
df3.loc["Bob"]

Age         30
City    London
Name: Bob, dtype: object

In [12]:
df3.loc[["Alice", "Charlie"]]

Unnamed: 0_level_0,Age,City
Name,Unnamed: 1_level_1,Unnamed: 2_level_1
Alice,25,New York
Charlie,22,Paris


In [13]:
df3.loc["Alice":"Bob"]

Unnamed: 0_level_0,Age,City
Name,Unnamed: 1_level_1,Unnamed: 2_level_1
Alice,25,New York
Bob,30,London


In [14]:
df3.loc[df3["City"] != "London"]

Unnamed: 0_level_0,Age,City
Name,Unnamed: 1_level_1,Unnamed: 2_level_1
Alice,25,New York
Charlie,22,Paris
