## 2.1 Introduction to Python Libraries

### 2.1.1. Installing Pandas

In [535]:
pip install pandas

Defaulting to user installation because normal site-packages is not writeableNote: you may need to restart the kernel to use updated packages.



### 2.1.2. Data Structure in Pandas

## 2.2 Series

### 2.2.1 Creation of Series

#### (A) Creation of Series from Scalar Values

In [536]:
# import Pandas with alias pd
import pandas as pd

# create a Series
series1 = pd.Series([10, 20, 30])
# Display the series
print(series1)

0    10
1    20
2    30
dtype: int64


In [537]:
series2 = pd.Series(
    ["Kavi", "Shyam", "Ravi"],
    index=[3, 5, 1],
)
# Display the series
print(series2)

3     Kavi
5    Shyam
1     Ravi
dtype: object


In [538]:
series2 = pd.Series(
    [2, 3, 4],
    index=["Feb", "Mar", "Apr"],
)
# Display the series
print(series2)

Feb    2
Mar    3
Apr    4
dtype: int64


#### (B) Creation of Series from NumPy Arrays

In [652]:
# import NumPy with alias np
import numpy as np
import pandas as pd

array1 = np.array([1, 2, 3, 4])
series3 = pd.Series(array1)
print(series3)

0    1
1    2
2    3
3    4
dtype: int64


In [540]:
series4 = pd.Series(
    array1,
    index=["Jan", "Feb", "Mar", "Apr"],
)
print(series4)

Jan    1
Feb    2
Mar    3
Apr    4
dtype: int64


In [653]:
# series5 = pd.Series(
#     array1,
#     index=["Jan", "Feb", "Mar"],
# )

ValueError: Length of values (4) does not match length of index (3)

#### (C) Creation of Series from Dictionary

In [542]:
dict1 = {
    "India": "NewDelhi",
    "UK": "London",
    "Japan": "Tokyo",
}

# Display the dictionary
print(dict1)

{'India': 'NewDelhi', 'UK': 'London', 'Japan': 'Tokyo'}


In [543]:
series8 = pd.Series(dict1)

# Display the series
print(series8)

India    NewDelhi
UK         London
Japan       Tokyo
dtype: object


### 2.2.2 Accessing Elements of a Series

#### (A) Indexing

In [544]:
seriesNum = pd.Series([10, 20, 30])
seriesNum[2]

np.int64(30)

In [545]:
seriesMnths = pd.Series(
    [2, 3, 4],
    index=["Feb", "Mar", "Apr"],
)
seriesMnths["Mar"]

np.int64(3)

In [546]:
seriesCapCntry = pd.Series(
    ["NewDelhi", "WashingtonDC", "London", "Paris"],
    index=["India", "USA", "UK", "France"],
)
seriesCapCntry["India"]

'NewDelhi'

In [547]:
seriesCapCntry[1]

  seriesCapCntry[1]


'WashingtonDC'

In [548]:
seriesCapCntry[[3, 2]]

  seriesCapCntry[[3, 2]]


France     Paris
UK        London
dtype: object

In [549]:
seriesCapCntry[["UK", "USA"]]

UK           London
USA    WashingtonDC
dtype: object

In [550]:
seriesCapCntry.index = [10, 20, 30, 40]
seriesCapCntry

10        NewDelhi
20    WashingtonDC
30          London
40           Paris
dtype: object

#### (B) Slicing

In [551]:
seriesCapCntry = pd.Series(
    ["NewDelhi", "WashingtonDC", "London", "Paris"],
    index=["India", "USA", "UK", "France"],
)

# excludes the value at index position 3
seriesCapCntry[1:3]

USA    WashingtonDC
UK           London
dtype: object

In [552]:
seriesCapCntry["USA":"France"]

USA       WashingtonDC
UK              London
France           Paris
dtype: object

In [553]:
seriesCapCntry[::-1]

France           Paris
UK              London
USA       WashingtonDC
India         NewDelhi
dtype: object

In [554]:
import numpy as np

seriesAlph = pd.Series(
    np.arange(10, 16, 1),
    index=["a", "b", "c", "d", "e", "f"],
)
seriesAlph

a    10
b    11
c    12
d    13
e    14
f    15
dtype: int64

In [555]:
seriesAlph[1:3] = 50
seriesAlph

a    10
b    50
c    50
d    13
e    14
f    15
dtype: int64

In [556]:
seriesAlph["c":"e"] = 500
seriesAlph

a     10
b     50
c    500
d    500
e    500
f     15
dtype: int64

### 2.2.3 Attributes of Series

In [557]:
seriesCapCntry

India         NewDelhi
USA       WashingtonDC
UK              London
France           Paris
dtype: object

###### Table 2.1 Attributes of Pandas Series


In [558]:
seriesCapCntry.name = "Capitals"
print(seriesCapCntry)

India         NewDelhi
USA       WashingtonDC
UK              London
France           Paris
Name: Capitals, dtype: object


In [559]:
seriesCapCntry.index.name = "Countries"
print(seriesCapCntry)

Countries
India         NewDelhi
USA       WashingtonDC
UK              London
France           Paris
Name: Capitals, dtype: object


In [560]:
print(seriesCapCntry.values)

['NewDelhi' 'WashingtonDC' 'London' 'Paris']


In [561]:
print(seriesCapCntry.size)

4


In [562]:
seriesCapCntry.empty

False

In [563]:
# Create an empty series
seriesEmpt = pd.Series()
seriesEmpt.empty

True

### 2.2.4 Methods of Series

In [564]:
seriesTenTwenty = pd.Series(np.arange(10, 20, 1))
print(seriesTenTwenty)

0    10
1    11
2    12
3    13
4    14
5    15
6    16
7    17
8    18
9    19
dtype: int64


###### Activity 2.3

In [565]:
import pandas as pd
import numpy as np

s2 = pd.Series([12, np.nan, 10])
print(s2)

0    12.0
1     NaN
2    10.0
dtype: float64


###### Table

In [566]:
seriesTenTwenty.head(2)

0    10
1    11
dtype: int64

In [567]:
seriesTenTwenty.head()

0    10
1    11
2    12
3    13
4    14
dtype: int64

In [568]:
seriesTenTwenty.count()

np.int64(10)

In [569]:
seriesTenTwenty.tail(2)

8    18
9    19
dtype: int64

In [570]:
seriesTenTwenty.tail()

5    15
6    16
7    17
8    18
9    19
dtype: int64

### 2.2.5 Mathematical Operations on Series

In [571]:
seriesA = pd.Series(
    [1, 2, 3, 4, 5],
    index=["a", "b", "c", "d", "e"],
)
seriesA

a    1
b    2
c    3
d    4
e    5
dtype: int64

In [572]:
seriesB = pd.Series(
    [10, 20, -10, -50, 100],
    index=["z", "y", "a", "c", "e"],
)
seriesB

z     10
y     20
a    -10
c    -50
e    100
dtype: int64

#### (A) Addition of two Series

In [573]:
seriesA + seriesB

a     -9.0
b      NaN
c    -47.0
d      NaN
e    105.0
y      NaN
z      NaN
dtype: float64

In [574]:
seriesA.add(seriesB, fill_value=0)

a     -9.0
b      2.0
c    -47.0
d      4.0
e    105.0
y     20.0
z     10.0
dtype: float64

#### (B) Subtraction of two Series

In [575]:
# using subtraction operator
seriesA - seriesB

a    11.0
b     NaN
c    53.0
d     NaN
e   -95.0
y     NaN
z     NaN
dtype: float64

In [576]:
# using fill value 1000 while making
# explicit call of the method
seriesA.sub(seriesB, fill_value=1000)

a     11.0
b   -998.0
c     53.0
d   -996.0
e    -95.0
y    980.0
z    990.0
dtype: float64

#### (C) Multiplication of two Series

In [577]:
# using multiplication operator
seriesA * seriesB

a    -10.0
b      NaN
c   -150.0
d      NaN
e    500.0
y      NaN
z      NaN
dtype: float64

In [578]:
# using fill value 0 while making
# explicit call of the method
seriesA.mul(seriesB, fill_value=0)

a    -10.0
b      0.0
c   -150.0
d      0.0
e    500.0
y      0.0
z      0.0
dtype: float64

#### (D) Division of two Series

In [579]:
# using division operator
seriesA / seriesB

a   -0.10
b     NaN
c   -0.06
d     NaN
e    0.05
y     NaN
z     NaN
dtype: float64

In [580]:
# using fill value 0 while making
# explicit call of the method
seriesA.div(seriesB, fill_value=0)

a   -0.10
b     inf
c   -0.06
d     inf
e    0.05
y    0.00
z    0.00
dtype: float64

## 2.3 dataFrame

### 2.3.1 Creation of DataFrame

#### (A) Creation of an empty DataFrame

In [581]:
import pandas as pd

dFrameEmt = pd.DataFrame()
print(dFrameEmt)

Empty DataFrame
Columns: []
Index: []


#### (B) Creation of DataFrame from NumPy ndarrays

In [582]:
import numpy as np

array1 = np.array([10, 20, 30])
array2 = np.array([100, 200, 300])
array3 = np.array([-10, -20, -30, -40])
dFrame4 = pd.DataFrame(array1)
dFrame4

Unnamed: 0,0
0,10
1,20
2,30


In [583]:
dFrame5 = pd.DataFrame(
    [array1, array3, array2],
    columns=["A", "B", "C", "D"],
)
dFrame5

Unnamed: 0,A,B,C,D
0,10,20,30,
1,-10,-20,-30,-40.0
2,100,200,300,


#### (C) Creation of DataFrame from List of Dictionaries

In [584]:
# Create list of dictionaries
listDict = [
    {"a": 10, "b": 20},
    {"a": 5, "b": 10, "c": 20},
]
dFrameListDict = pd.DataFrame(listDict)
dFrameListDict

Unnamed: 0,a,b,c
0,10,20,
1,5,10,20.0


#### (D) Creation of DataFrame from Dictionary of Lists

In [585]:
dictForest = {
    "State": ["Assam", "Delhi", "Kerala"],
    "GArea": [78438, 1483, 38852],
    "VDF": [2797, 6.72, 1663],
}
dFrameForest = pd.DataFrame(dictForest)
dFrameForest

Unnamed: 0,State,GArea,VDF
0,Assam,78438,2797.0
1,Delhi,1483,6.72
2,Kerala,38852,1663.0


In [586]:
dFrameForest1 = pd.DataFrame(
    dictForest,
    columns=["State", "VDF", "GArea"],
)
dFrameForest1

Unnamed: 0,State,VDF,GArea
0,Assam,2797.0,78438
1,Delhi,6.72,1483
2,Kerala,1663.0,38852


#### (E) Creation of DataFrame from Series

In [587]:
seriesA = pd.Series(
    [1, 2, 3, 4, 5],
    index=["a", "b", "c", "d", "e"],
)
seriesB = pd.Series(
    [1000, 2000, -1000, -5000, 1000],
    index=["a", "b", "c", "d", "e"],
)
seriesC = pd.Series(
    [10, 20, -10, -50, 100],
    index=["z", "y", "a", "c", "e"],
)

In [588]:
dFrame6 = pd.DataFrame(seriesA)
dFrame6

Unnamed: 0,0
a,1
b,2
c,3
d,4
e,5


In [589]:
dFrame7 = pd.DataFrame([seriesA, seriesB])
dFrame7

Unnamed: 0,a,b,c,d,e
0,1,2,3,4,5
1,1000,2000,-1000,-5000,1000


In [590]:
dFrame8 = pd.DataFrame([seriesA, seriesC])
dFrame8

Unnamed: 0,a,b,c,d,e,z,y
0,1.0,2.0,3.0,4.0,5.0,,
1,-10.0,,-50.0,,100.0,10.0,20.0


#### (F) Creation of DataFrame from Dictionary of Series

In [591]:
ResultSheet = {
    "Arnab": pd.Series(
        [90, 91, 97],
        index=["Maths", "Science", "Hindi"],
    ),
    "Ramit": pd.Series(
        [92, 81, 96],
        index=["Maths", "Science", "Hindi"],
    ),
    "Samridhi": pd.Series(
        [89, 91, 88],
        index=["Maths", "Science", "Hindi"],
    ),
    "Riya": pd.Series(
        [81, 71, 67],
        index=["Maths", "Science", "Hindi"],
    ),
    "Mallika": pd.Series(
        [94, 95, 99],
        index=["Maths", "Science", "Hindi"],
    ),
}

In [592]:
ResultDF = pd.DataFrame(ResultSheet)
ResultDF

Unnamed: 0,Arnab,Ramit,Samridhi,Riya,Mallika
Maths,90,92,89,81,94
Science,91,81,91,71,95
Hindi,97,96,88,67,99


In [593]:
type(ResultDF.Arnab)

pandas.core.series.Series

In [594]:
dictForUnion = {
    "Series1": pd.Series(
        [1, 2, 3, 4, 5],
        index=["a", "b", "c", "d", "e"],
    ),
    "Series2": pd.Series(
        [10, 20, -10, -50, 100],
        index=["z", "y", "a", "c", "e"],
    ),
    "Series3": pd.Series(
        [10, 20, -10, -50, 100],
        index=["z", "y", "a", "c", "e"],
    ),
}

In [595]:
dFrameUnion = pd.DataFrame(dictForUnion)
dFrameUnion

Unnamed: 0,Series1,Series2,Series3
a,1.0,-10.0,-10.0
b,2.0,,
c,3.0,-50.0,-50.0
d,4.0,,
e,5.0,100.0,100.0
y,,20.0,20.0
z,,10.0,10.0


### 2.3.2 Operations on rows and columns in DataFrames

#### (A) Adding a New Column to a DataFrame

In [596]:
# Reset ResultDF
ResultDF = pd.DataFrame(ResultSheet)

ResultDF["Preeti"] = [89, 78, 76]
ResultDF

Unnamed: 0,Arnab,Ramit,Samridhi,Riya,Mallika,Preeti
Maths,90,92,89,81,94,89
Science,91,81,91,71,95,78
Hindi,97,96,88,67,99,76


In [597]:
ResultDF["Ramit"] = [99, 98, 78]
ResultDF

Unnamed: 0,Arnab,Ramit,Samridhi,Riya,Mallika,Preeti
Maths,90,99,89,81,94,89
Science,91,98,91,71,95,78
Hindi,97,78,88,67,99,76


In [598]:
ResultDF["Arnab"] = 90
ResultDF

Unnamed: 0,Arnab,Ramit,Samridhi,Riya,Mallika,Preeti
Maths,90,99,89,81,94,89
Science,90,98,91,71,95,78
Hindi,90,78,88,67,99,76


#### (B) Adding a New Row to a DataFrame

In [599]:
ResultDF

Unnamed: 0,Arnab,Ramit,Samridhi,Riya,Mallika,Preeti
Maths,90,99,89,81,94,89
Science,90,98,91,71,95,78
Hindi,90,78,88,67,99,76


In [600]:
ResultDF.loc["English"] = [85, 86, 83, 80, 90, 89]
ResultDF

Unnamed: 0,Arnab,Ramit,Samridhi,Riya,Mallika,Preeti
Maths,90,99,89,81,94,89
Science,90,98,91,71,95,78
Hindi,90,78,88,67,99,76
English,85,86,83,80,90,89


In [601]:
ResultDF.loc["English"] = [95, 86, 95, 80, 95, 99]
ResultDF

Unnamed: 0,Arnab,Ramit,Samridhi,Riya,Mallika,Preeti
Maths,90,99,89,81,94,89
Science,90,98,91,71,95,78
Hindi,90,78,88,67,99,76
English,95,86,95,80,95,99


In [602]:
ResultDF.loc["Maths"] = 0
ResultDF

Unnamed: 0,Arnab,Ramit,Samridhi,Riya,Mallika,Preeti
Maths,0,0,0,0,0,0
Science,90,98,91,71,95,78
Hindi,90,78,88,67,99,76
English,95,86,95,80,95,99


In [603]:
# Set all values in ResultDF to 0
ResultDF[:] = 0
ResultDF

Unnamed: 0,Arnab,Ramit,Samridhi,Riya,Mallika,Preeti
Maths,0,0,0,0,0,0
Science,0,0,0,0,0,0
Hindi,0,0,0,0,0,0
English,0,0,0,0,0,0


#### (C) Deleting Rows or Columns from a DataFrame

In [604]:
# Reset ResultDF
ResultDF = pd.DataFrame(
    {
        "Arnab": pd.Series(
            [90, 91, 97, 95],
            index=["Maths", "Science", "Hindi", "English"],
        ),
        "Ramit": pd.Series(
            [92, 81, 96, 86],
            index=["Maths", "Science", "Hindi", "English"],
        ),
        "Samridhi": pd.Series(
            [89, 91, 88, 95],
            index=["Maths", "Science", "Hindi", "English"],
        ),
        "Riya": pd.Series(
            [81, 71, 67, 80],
            index=["Maths", "Science", "Hindi", "English"],
        ),
        "Mallika": pd.Series(
            [94, 95, 99, 95],
            index=["Maths", "Science", "Hindi", "English"],
        ),
    }
)


ResultDF

Unnamed: 0,Arnab,Ramit,Samridhi,Riya,Mallika
Maths,90,92,89,81,94
Science,91,81,91,71,95
Hindi,97,96,88,67,99
English,95,86,95,80,95


In [605]:
ResultDF = ResultDF.drop("Science", axis=0)
ResultDF

Unnamed: 0,Arnab,Ramit,Samridhi,Riya,Mallika
Maths,90,92,89,81,94
Hindi,97,96,88,67,99
English,95,86,95,80,95


In [606]:
ResultDF = ResultDF.drop(
    ["Samridhi", "Ramit", "Riya"],
    axis=1,
)
ResultDF

Unnamed: 0,Arnab,Mallika
Maths,90,94
Hindi,97,99
English,95,95


In [607]:
# Reset ResultDF
ResultDF = pd.DataFrame(
    {
        "Arnab": pd.Series(
            [90, 91, 97, 97],
            index=["Maths", "Science", "Hindi", "Hindi"],
        ),
        "Ramit": pd.Series(
            [92, 81, 96, 86],
            index=["Maths", "Science", "Hindi", "Hindi"],
        ),
        "Samridhi": pd.Series(
            [89, 91, 88, 78],
            index=["Maths", "Science", "Hindi", "Hindi"],
        ),
        "Riya": pd.Series(
            [81, 71, 67, 60],
            index=["Maths", "Science", "Hindi", "Hindi"],
        ),
        "Mallika": pd.Series(
            [94, 95, 99, 45],
            index=["Maths", "Science", "Hindi", "Hindi"],
        ),
    }
)

ResultDF

Unnamed: 0,Arnab,Ramit,Samridhi,Riya,Mallika
Maths,90,92,89,81,94
Science,91,81,91,71,95
Hindi,97,96,88,67,99
Hindi,97,86,78,60,45


In [608]:
ResultDF = ResultDF.drop("Hindi", axis=0)
ResultDF

Unnamed: 0,Arnab,Ramit,Samridhi,Riya,Mallika
Maths,90,92,89,81,94
Science,91,81,91,71,95


#### (D) Renaming Row Labels of a DataFrame

In [609]:
# Reset ResultDF
ResultDF = pd.DataFrame(
    {
        "Arnab": pd.Series(
            [90, 91, 97, 97],
            index=["Maths", "Science", "English", "Hindi"],
        ),
        "Ramit": pd.Series(
            [92, 81, 96, 86],
            index=["Maths", "Science", "English", "Hindi"],
        ),
        "Samridhi": pd.Series(
            [89, 91, 88, 78],
            index=["Maths", "Science", "English", "Hindi"],
        ),
        "Riya": pd.Series(
            [81, 71, 67, 60],
            index=["Maths", "Science", "English", "Hindi"],
        ),
        "Mallika": pd.Series(
            [94, 95, 99, 45],
            index=["Maths", "Science", "English", "Hindi"],
        ),
    }
)

ResultDF

Unnamed: 0,Arnab,Ramit,Samridhi,Riya,Mallika
Maths,90,92,89,81,94
Science,91,81,91,71,95
English,97,96,88,67,99
Hindi,97,86,78,60,45


In [610]:
ResultDF = ResultDF.rename(
    {
        "Maths": "Sub1",
        "Science": "Sub2",
        "English": "Sub3",
        "Hindi": "Sub4",
    },
    axis="index",
)
print(ResultDF)

      Arnab  Ramit  Samridhi  Riya  Mallika
Sub1     90     92        89    81       94
Sub2     91     81        91    71       95
Sub3     97     96        88    67       99
Sub4     97     86        78    60       45


In [611]:
ResultDF = ResultDF.rename(
    {
        "Arnab": "Student1",
        "Ramit": "Student2",
        "Samridhi": "Student3",
        "Mallika": "Student4",
    },
    axis="columns",
)
print(ResultDF)

      Student1  Student2  Student3  Riya  Student4
Sub1        90        92        89    81        94
Sub2        91        81        91    71        95
Sub3        97        96        88    67        99
Sub4        97        86        78    60        45


#### (E) Renaming Column Labels of a DataFrame

In [612]:
# Reset ResultDF
ResultDF = pd.DataFrame(
    {
        "Arnab": pd.Series(
            [90, 91, 97, 97],
            index=["Maths", "Science", "English", "Hindi"],
        ),
        "Ramit": pd.Series(
            [92, 81, 96, 86],
            index=["Maths", "Science", "English", "Hindi"],
        ),
        "Samridhi": pd.Series(
            [89, 91, 88, 78],
            index=["Maths", "Science", "English", "Hindi"],
        ),
        "Riya": pd.Series(
            [81, 71, 67, 60],
            index=["Maths", "Science", "English", "Hindi"],
        ),
        "Mallika": pd.Series(
            [94, 95, 99, 45],
            index=["Maths", "Science", "English", "Hindi"],
        ),
    }
)

ResultDF = ResultDF.rename(
    {
        "Arnab": "Student1",
        "Ramit": "Student2",
        "Samridhi": "Student3",
        "Mallika": "Student4",
    },
    axis="columns",
)
print(ResultDF)

         Student1  Student2  Student3  Riya  Student4
Maths          90        92        89    81        94
Science        91        81        91    71        95
English        97        96        88    67        99
Hindi          97        86        78    60        45


### 2.3.3 Accessing DataFrames Element through Indexing

#### (A) Label Based Indexing

In [613]:
# Reset ResultDF
ResultDF = pd.DataFrame(ResultSheet)

ResultDF

Unnamed: 0,Arnab,Ramit,Samridhi,Riya,Mallika
Maths,90,92,89,81,94
Science,91,81,91,71,95
Hindi,97,96,88,67,99


In [614]:
ResultDF.loc["Science"]

Arnab       91
Ramit       81
Samridhi    91
Riya        71
Mallika     95
Name: Science, dtype: int64

In [615]:
dFrame10Multiples = pd.DataFrame([10, 20, 30, 40, 50])
dFrame10Multiples.loc[2]

0    30
Name: 2, dtype: int64

In [616]:
ResultDF.loc[:, "Arnab"]

Maths      90
Science    91
Hindi      97
Name: Arnab, dtype: int64

In [617]:
print(ResultDF["Arnab"])

Maths      90
Science    91
Hindi      97
Name: Arnab, dtype: int64


In [618]:
ResultDF.loc[["Science", "Hindi"]]

Unnamed: 0,Arnab,Ramit,Samridhi,Riya,Mallika
Science,91,81,91,71,95
Hindi,97,96,88,67,99


#### (B) Boolean Indexing

In [619]:
ResultDF.loc["Maths"] > 90

Arnab       False
Ramit        True
Samridhi    False
Riya        False
Mallika      True
Name: Maths, dtype: bool

In [620]:
ResultDF.loc[:, "Arnab"] > 90

Maths      False
Science     True
Hindi       True
Name: Arnab, dtype: bool

### 2.3.4 Accessing DataFrames Element through Slicing

In [621]:
ResultDF.loc["Maths":"Science"]

Unnamed: 0,Arnab,Ramit,Samridhi,Riya,Mallika
Maths,90,92,89,81,94
Science,91,81,91,71,95


In [622]:
ResultDF.loc["Maths":"Science", "Arnab"]

Maths      90
Science    91
Name: Arnab, dtype: int64

In [623]:
ResultDF.loc["Maths":"Science", "Arnab":"Samridhi"]

Unnamed: 0,Arnab,Ramit,Samridhi
Maths,90,92,89
Science,91,81,91


In [624]:
ResultDF.loc["Maths":"Science", ["Arnab", "Samridhi"]]

Unnamed: 0,Arnab,Samridhi
Maths,90,89
Science,91,91


#### Filtering Rows in DataFrames

In [625]:
ResultDF.loc[[True, False, True]]

Unnamed: 0,Arnab,Ramit,Samridhi,Riya,Mallika
Maths,90,92,89,81,94
Hindi,97,96,88,67,99


### 2.3.5 Joining, Merging and Concatenation of DataFrames

#### (A) Joining

In [626]:
dFrame1 = pd.DataFrame(
    [[1, 2, 3], [4, 5], [6]],
    columns=["C1", "C2", "C3"],
    index=["R1", "R2", "R3"],
)
dFrame1

Unnamed: 0,C1,C2,C3
R1,1,2.0,3.0
R2,4,5.0,
R3,6,,


In [627]:
dFrame2 = pd.DataFrame(
    [[10, 20], [30], [40, 50]],
    columns=["C2", "C5"],
    index=["R4", "R2", "R5"],
)
dFrame2

Unnamed: 0,C2,C5
R4,10,20.0
R2,30,
R5,40,50.0


In [628]:
# dFrame1 = dFrame1.append(dFrame2)

dFrame1 = pd.concat([dFrame1, dFrame2])
dFrame1

Unnamed: 0,C1,C2,C3,C5
R1,1.0,2.0,3.0,
R2,4.0,5.0,,
R3,6.0,,,
R4,,10.0,,20.0
R2,,30.0,,
R5,,40.0,,50.0


In [629]:
# Reset dFrame1 and dFrame2
dFrame1 = pd.DataFrame(
    [[1, 2, 3], [4, 5], [6]],
    columns=["C1", "C2", "C3"],
    index=["R1", "R2", "R3"],
)
dFrame2 = pd.DataFrame(
    [[10, 20], [30], [40, 50]],
    columns=["C2", "C5"],
    index=["R4", "R2", "R5"],
)

# append dFrame1 to dFrame2
# dFrame2 = dFrame2.append(dFrame1, sort=True)

# concatenate dFrame1 to dFrame2
dFrame2 = pd.concat([dFrame2, dFrame1], sort=True)
dFrame2

Unnamed: 0,C1,C2,C3,C5
R4,,10.0,,20.0
R2,,30.0,,
R5,,40.0,,50.0
R1,1.0,2.0,3.0,
R2,4.0,5.0,,
R3,6.0,,,


In [630]:
# Reset dFrame1 and dFrame2
dFrame1 = pd.DataFrame(
    [[1, 2, 3], [4, 5], [6]],
    columns=["C1", "C2", "C3"],
    index=["R1", "R2", "R3"],
)
dFrame2 = pd.DataFrame(
    [[10, 20], [30], [40, 50]],
    columns=["C2", "C5"],
    index=["R4", "R2", "R5"],
)

# append dFrame1 to dFrame2 with sort=False
# dFrame2 = dFrame2.append(dFrame1, sort=True)

# concatenate dFrame1 to dFrame2 with sort=False
dFrame2 = pd.concat([dFrame2, dFrame1], sort=False)
dFrame2

Unnamed: 0,C2,C5,C1,C3
R4,10.0,20.0,,
R2,30.0,,,
R5,40.0,50.0,,
R1,2.0,,1.0,3.0
R2,5.0,,4.0,
R3,,,6.0,


In [631]:
# Reset dFrame1 and dFrame2
dFrame1 = pd.DataFrame(
    [[1, 2, 3], [4, 5], [6]],
    columns=["C1", "C2", "C3"],
    index=["R1", "R2", "R3"],
)
dFrame2 = pd.DataFrame(
    [[10, 20], [30], [40, 50]],
    columns=["C2", "C5"],
    index=["R4", "R2", "R5"],
)

# dFrame1 = dFrame1.append(dFrame2, ignore_index=True)

dFrame1 = pd.concat([dFrame1, dFrame2], ignore_index=True)
dFrame1

Unnamed: 0,C1,C2,C3,C5
0,1.0,2.0,3.0,
1,4.0,5.0,,
2,6.0,,,
3,,10.0,,20.0
4,,30.0,,
5,,40.0,,50.0


### 2.3.6 Attributes of DataFrames

In [632]:
ForestArea = {
    "Assam": pd.Series(
        [78438, 2797, 10192, 15116],
        index=["GeoArea", "VeryDense", "ModeratelyDense", "OpenForest"],
    ),
    "Kerala": pd.Series(
        [38852, 1663, 9407, 9251],
        index=["GeoArea", "VeryDense", "ModeratelyDense", "OpenForest"],
    ),
    "Delhi": pd.Series(
        [1483, 6.72, 56.24, 129.45],
        index=["GeoArea", "VeryDense", "ModeratelyDense", "OpenForest"],
    ),
}

In [633]:
ForestAreaDF = pd.DataFrame(ForestArea)
ForestAreaDF

Unnamed: 0,Assam,Kerala,Delhi
GeoArea,78438,38852,1483.0
VeryDense,2797,1663,6.72
ModeratelyDense,10192,9407,56.24
OpenForest,15116,9251,129.45


###### Table 2.4 Some Attributes of Pandas DataFrame


In [634]:
ForestAreaDF.index

Index(['GeoArea', 'VeryDense', 'ModeratelyDense', 'OpenForest'], dtype='object')

In [635]:
ForestAreaDF.columns

Index(['Assam', 'Kerala', 'Delhi'], dtype='object')

In [636]:
ForestAreaDF.dtypes

Assam       int64
Kerala      int64
Delhi     float64
dtype: object

In [637]:
ForestAreaDF.values

array([[7.8438e+04, 3.8852e+04, 1.4830e+03],
       [2.7970e+03, 1.6630e+03, 6.7200e+00],
       [1.0192e+04, 9.4070e+03, 5.6240e+01],
       [1.5116e+04, 9.2510e+03, 1.2945e+02]])

In [638]:
ForestAreaDF.shape

(4, 3)

In [639]:
ForestAreaDF.size

12

In [640]:
ForestAreaDF.T

Unnamed: 0,GeoArea,VeryDense,ModeratelyDense,OpenForest
Assam,78438.0,2797.0,10192.0,15116.0
Kerala,38852.0,1663.0,9407.0,9251.0
Delhi,1483.0,6.72,56.24,129.45


In [641]:
ForestAreaDF.head(2)

Unnamed: 0,Assam,Kerala,Delhi
GeoArea,78438,38852,1483.0
VeryDense,2797,1663,6.72


In [642]:
ForestAreaDF.tail(2)

Unnamed: 0,Assam,Kerala,Delhi
ModeratelyDense,10192,9407,56.24
OpenForest,15116,9251,129.45


In [643]:
ForestAreaDF.empty

False

In [644]:
# Create an empty dataFrame
df = pd.DataFrame()
df.empty

True

## 2.4 Importing and Exporting data between CSV Files and dataFrames

### 2.4.1 Importing a CSV file to a DataFrame

In [645]:
marks = pd.read_csv(
    "C:/NCERT/ResultData.csv",
    sep=",",
    header=0,
)
marks

Unnamed: 0,RollNo,Name,Eco,Maths
0,1,Arnab,18,57
1,2,Kritika,23,45
2,3,Divyam,51,37
3,4,Vivaan,40,60
4,5,Aaroosh,18,27


In [646]:
marks1 = pd.read_csv(
    "C:/NCERT/ResultData.csv",
    sep=",",
    names=["RNo", "StudentName", "Sub1", "Sub2"],
)
marks1

Unnamed: 0,RNo,StudentName,Sub1,Sub2
0,RollNo,Name,Eco,Maths
1,1,Arnab,18,57
2,2,Kritika,23,45
3,3,Divyam,51,37
4,4,Vivaan,40,60
5,5,Aaroosh,18,27


### 2.4.2 Exporting a DataFrame to a CSV file

In [647]:
ResultDF

Unnamed: 0,Arnab,Ramit,Samridhi,Riya,Mallika
Maths,90,92,89,81,94
Science,91,81,91,71,95
Hindi,97,96,88,67,99


In [648]:
ResultDF.to_csv(
    path_or_buf="C:/NCERT/resultout.csv",
    sep=",",
)

In [649]:
ResultDF.to_csv(
    "C:/NCERT/resultonly.txt",
    sep="@",
    header=False,
    index=False,
)

## 2.5 Pandas series Vs numPy ndarray