In [5]:
#importing Pandas library
import pandas as pd

In [12]:
#Test Data for Pandas DataFrame
data = {
    'Cars Sold (thousands)': [78, 92, 109, 186], 
    'Bikes Sold (thousands)': [51, 71, 88, 110]
}

In [14]:
#converting the data into a DataFrame
vehicles = pd.DataFrame(data)

In [15]:
#creating an index for the DataFrame (table)
purchases = pd.DataFrame(data, index=['2010', '2014', '2018', '2022'])
purchases

Unnamed: 0,Cars Sold (thousands),Bikes Sold (thousands)
2010,78,51
2014,92,71
2018,109,88
2022,186,110


In [16]:
#accessing a record (row) from the DataFrame using index value '2014'
purchases.loc['2014']

Cars Sold (thousands)     92
Bikes Sold (thousands)    71
Name: 2014, dtype: int64

In [22]:
#accessing a multiple records (rows) from the DataFrame using numerical index
purchases.iloc[0:4]

Unnamed: 0,Cars Sold (thousands),Bikes Sold (thousands)
2010,78,51
2014,92,71
2018,109,88
2022,186,110


In [23]:
#creating a new DataFrame to demostrate the use of 'inplace'
car_market = pd.DataFrame({"Car Company": ["BMW"], "Car Model": ["X325i"]})
car_market

Unnamed: 0,Car Company,Car Model
0,BMW,X325i


In [24]:
#when inplace is 'true' the changes are made in the original DataFrame
car_market.replace({"X325i": "M5"}, inplace=True)
car_market

Unnamed: 0,Car Company,Car Model
0,BMW,M5


In [40]:
#when inplace is 'false' the changes are made in the copy of the DataFrame
new_car_market = car_market.replace({"M5": "M7"}, inplace=False)
print('Original Dataframe (unchanged)')
print(car_market)
print('\nNew Copied Dataframe')
print(new_car_market)

Original Dataframe (unchanged)
  Car Company Car Model
0         BMW        M5

New Copied Dataframe
  Car Company Car Model
0         BMW        M7


In [42]:
#read csv into pandas dataframe
df = pd.read_csv('./covid-pak-data.csv')
df

Unnamed: 0,City,Latitude,Longitude,Total Cases,Avg Growth Rate,Doubling Rate (Days)
0,Lahore,31.52,74.35,50111,0.10%,694.2
1,Rawalpindi,33.56,73.01,8093,0.16%,433.9
2,Multan,32.49,74.52,6135,0.10%,689.8
3,Faisalabad,31.45,73.13,6102,0.06%,1136.3


In [44]:
#print complete DataFrame in case of many values (dataframe won't be shortened using display())
display(df)

Unnamed: 0,City,Latitude,Longitude,Total Cases,Avg Growth Rate,Doubling Rate (Days)
0,Lahore,31.52,74.35,50111,0.10%,694.2
1,Rawalpindi,33.56,73.01,8093,0.16%,433.9
2,Multan,32.49,74.52,6135,0.10%,689.8
3,Faisalabad,31.45,73.13,6102,0.06%,1136.3


In [48]:
#print only top 2 records of the DataFrame (df.head() returns top 5 by default)
df.head(2)

Unnamed: 0,City,Latitude,Longitude,Total Cases,Avg Growth Rate,Doubling Rate (Days)
0,Lahore,31.52,74.35,50111,0.10%,694.2
1,Rawalpindi,33.56,73.01,8093,0.16%,433.9


In [47]:
#print only bottom 2 records of the DataFrame (df.tail() returns bottom 5 by default)
df.tail(2)

Unnamed: 0,City,Latitude,Longitude,Total Cases,Avg Growth Rate,Doubling Rate (Days)
2,Multan,32.49,74.52,6135,0.10%,689.8
3,Faisalabad,31.45,73.13,6102,0.06%,1136.3


In [49]:
#print infomation about the DataFrame
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4 entries, 0 to 3
Data columns (total 6 columns):
 #   Column                Non-Null Count  Dtype  
---  ------                --------------  -----  
 0   City                  4 non-null      object 
 1   Latitude              4 non-null      float64
 2   Longitude             4 non-null      float64
 3   Total Cases           4 non-null      int64  
 4   Avg Growth Rate       4 non-null      object 
 5   Doubling Rate (Days)  4 non-null      float64
dtypes: float64(3), int64(1), object(2)
memory usage: 320.0+ bytes


In [51]:
#print shape (rows x columns) of the DataFrame (shape does not have the () at the end)
df.shape

(4, 6)

In [53]:
#print the whole column using the column header
df['City']

0        Lahore
1    Rawalpindi
2        Multan
3    Faisalabad
Name: City, dtype: object

In [54]:
#print all columns names of the DataFrame
df.columns

Index(['City', 'Latitude', 'Longitude', 'Total Cases', 'Avg Growth Rate',
       'Doubling Rate (Days)'],
      dtype='object')

In [55]:
#print type of a column
type(df['City'])

pandas.core.series.Series

In [56]:
#print type of a table (multiple columns)
type(df[['City', 'Total Cases']])

pandas.core.frame.DataFrame

In [58]:
#format data from the DataFrame
df['City'].str.lower()

0        lahore
1    rawalpindi
2        multan
3    faisalabad
Name: City, dtype: object

In [61]:
#sort data from the DataFrame using any column
df.sort_values(by="Total Cases", ascending=False)

Unnamed: 0,City,Latitude,Longitude,Total Cases,Avg Growth Rate,Doubling Rate (Days)
0,Lahore,31.52,74.35,50111,0.10%,694.2
1,Rawalpindi,33.56,73.01,8093,0.16%,433.9
2,Multan,32.49,74.52,6135,0.10%,689.8
3,Faisalabad,31.45,73.13,6102,0.06%,1136.3


In [68]:
#apply arithmetic operations on numerical values from the DataFrame
df['Total Cases']*df['Doubling Rate (Days)']

0    34787056.2
1     3511552.7
2     4231923.0
3     6933702.6
dtype: float64

In [70]:
#apply arithmetic operations on numerical values from the DataFrame
mean = df['Total Cases'].mean()
median = df['Total Cases'].median()
min_val = df['Total Cases'].min()
sum_val = df['Total Cases'].sum()

print(f'mean = {mean}')
print(f'median = {median}')
print(f'min_val = {min_val}')
print(f'sum_val = {sum_val}')

mean = 17610.25
median = 7114.0
min_val = 6102
sum_val = 70441


In [71]:
#apply logical operations on numerical values from the DataFrame
df[df['Total Cases'] > 8000]

Unnamed: 0,City,Latitude,Longitude,Total Cases,Avg Growth Rate,Doubling Rate (Days)
0,Lahore,31.52,74.35,50111,0.10%,694.2
1,Rawalpindi,33.56,73.01,8093,0.16%,433.9


In [72]:
#convert a column from the DataFrame to Python list
df['Total Cases'].tolist()

[50111, 8093, 6135, 6102]