# Pandas

In [95]:
import pandas as pd
import numpy as np

In [96]:
pd.__version__

'2.0.3'

In [97]:
df_data = {
    "Fruits" : ["apple", "orange", "banana"],
    "Unit_Price" : [220, 110, 50],
    "Quantity" : [2, 3, 1]
    }
fruit_df = pd.DataFrame(df_data)
print(fruit_df, type(fruit_df))

   Fruits  Unit_Price  Quantity
0   apple         220         2
1  orange         110         3
2  banana          50         1 <class 'pandas.core.frame.DataFrame'>


In [98]:
add_row = pd.DataFrame({"Fruits": ["guava"], "Unit_Price" : [100], "Quantity" : [2]})
fruit_df2 = pd.concat([fruit_df, add_row], axis=0, ignore_index=True)  # axis=0 appends two dataframes row-wise
fruit_df2

Unnamed: 0,Fruits,Unit_Price,Quantity
0,apple,220,2
1,orange,110,3
2,banana,50,1
3,guava,100,2


In [99]:
add_row = pd.DataFrame({"Fruits": ["guava"], "Unit_Price" : [100], "Quantity" : [2]})
fruit_df3 = pd.concat([fruit_df, add_row], axis=1, ignore_index=True)  # axis=1 appends two dataframes column-wise
print(fruit_df3)

        0    1  2      3      4    5
0   apple  220  2  guava  100.0  2.0
1  orange  110  3    NaN    NaN  NaN
2  banana   50  1    NaN    NaN  NaN


In [100]:
npArr = fruit_df2.to_numpy()
print(npArr)

[['apple' 220 2]
 ['orange' 110 3]
 ['banana' 50 1]
 ['guava' 100 2]]


In [101]:
for i in fruit_df2:
    print(i, end="~")
    print(type(i))

Fruits~<class 'str'>
Unit_Price~<class 'str'>
Quantity~<class 'str'>


## Data Structures in Pandas
1. Series

2. DataFrame

In [102]:
# Series from a dict

series = pd.Series(data={
    "a" : 1,
    "b" : 2,
    "c" : 3
})
print(series)
print(series.index)

a    1
b    2
c    3
dtype: int64
Index(['a', 'b', 'c'], dtype='object')


In [103]:
# Series from a ndarray

narr = np.linspace(0, 5, 5)
series = pd.Series(data=narr)
print(series)

0    0.00
1    1.25
2    2.50
3    3.75
4    5.00
dtype: float64


In [104]:
string = "ramnareshnathan@gmail.com"
print(string.endswith("@gmail.com"))

True


DataFrames

In [105]:
# 20.08.23
print(fruit_df2.T)

                0       1       2      3
Fruits      apple  orange  banana  guava
Unit_Price    220     110      50    100
Quantity        2       3       1      2


In [106]:
print(fruit_df2, end="\n\n")
print(pd.melt(fruit_df2))

   Fruits  Unit_Price  Quantity
0   apple         220         2
1  orange         110         3
2  banana          50         1
3   guava         100         2

      variable   value
0       Fruits   apple
1       Fruits  orange
2       Fruits  banana
3       Fruits   guava
4   Unit_Price     220
5   Unit_Price     110
6   Unit_Price      50
7   Unit_Price     100
8     Quantity       2
9     Quantity       3
10    Quantity       1
11    Quantity       2


In [107]:
fruit_df2 = fruit_df2.drop(columns=["Quantity"])
fruit_df2

Unnamed: 0,Fruits,Unit_Price
0,apple,220
1,orange,110
2,banana,50
3,guava,100


In [108]:
quantity_col = pd.DataFrame({"Quantity": [2, 3, 1, 2]})
fruit_df2 = fruit_df2.join(quantity_col)
fruit_df2

Unnamed: 0,Fruits,Unit_Price,Quantity
0,apple,220,2
1,orange,110,3
2,banana,50,1
3,guava,100,2


In [109]:
fruit_df2["Fruits"]

0     apple
1    orange
2    banana
3     guava
Name: Fruits, dtype: object

In [110]:
fruit_df2["Fruits"] = fruit_df2["Fruits"].apply(lambda x: x.upper())
fruit_df2

Unnamed: 0,Fruits,Unit_Price,Quantity
0,APPLE,220,2
1,ORANGE,110,3
2,BANANA,50,1
3,GUAVA,100,2


In [111]:
fruit_df2["New_quantity"] = fruit_df2["Quantity"].apply(lambda x: x*2)
fruit_df2

Unnamed: 0,Fruits,Unit_Price,Quantity,New_quantity
0,APPLE,220,2,4
1,ORANGE,110,3,6
2,BANANA,50,1,2
3,GUAVA,100,2,4


In [112]:
group_fruits = fruit_df2.groupby(by="Fruits")["Unit_Price"].mean()
print(group_fruits)

Fruits
APPLE     220.0
BANANA     50.0
GUAVA     100.0
ORANGE    110.0
Name: Unit_Price, dtype: float64


In [113]:
fruit_df2.loc[len(fruit_df2)] = pd.Series(
    {
        "Fruits" : "APPLE",
        "Unit_Price" : 110,
        "Quantity" : 10
    }
)

In [115]:
group_fruits = fruit_df2.groupby(by="Fruits")["Unit_Price"].mean()
print(group_fruits)
# note the difference in output after adding new row of APPLE in the above cell

Fruits
APPLE     165.0
BANANA     50.0
GUAVA     100.0
ORANGE    110.0
Name: Unit_Price, dtype: float64
