In [1]:
import pandas as pd

In [2]:
df = pd.read_csv("./weather.csv")
df

Unnamed: 0,day,temperature,windspeed,event
0,01-01-2021,28.0,4.0,Rain
1,02-02-2021,32.0,3.0,Sunny
2,03-03-2021,34.0,7.0,Cloudy
3,04-04-2021,34.0,8.0,Cloudy
4,05-05-2021,40.0,10.0,Snow


#### Setting index column -> day

In [3]:
df.set_index("day")

Unnamed: 0_level_0,temperature,windspeed,event
day,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
01-01-2021,28.0,4.0,Rain
02-02-2021,32.0,3.0,Sunny
03-03-2021,34.0,7.0,Cloudy
04-04-2021,34.0,8.0,Cloudy
05-05-2021,40.0,10.0,Snow


#### But index column is not updated in df

In [4]:
df

Unnamed: 0,day,temperature,windspeed,event
0,01-01-2021,28.0,4.0,Rain
1,02-02-2021,32.0,3.0,Sunny
2,03-03-2021,34.0,7.0,Cloudy
3,04-04-2021,34.0,8.0,Cloudy
4,05-05-2021,40.0,10.0,Snow


#### To set index column -> day in original df

In [5]:
df.set_index("day", inplace=True)

In [6]:
df

Unnamed: 0_level_0,temperature,windspeed,event
day,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
01-01-2021,28.0,4.0,Rain
02-02-2021,32.0,3.0,Sunny
03-03-2021,34.0,7.0,Cloudy
04-04-2021,34.0,8.0,Cloudy
05-05-2021,40.0,10.0,Snow


In [7]:
df.loc["02-02-2021"]

temperature     32.0
windspeed        3.0
event          Sunny
Name: 02-02-2021, dtype: object

In [8]:
df.reset_index(inplace=True)
df

Unnamed: 0,day,temperature,windspeed,event
0,01-01-2021,28.0,4.0,Rain
1,02-02-2021,32.0,3.0,Sunny
2,03-03-2021,34.0,7.0,Cloudy
3,04-04-2021,34.0,8.0,Cloudy
4,05-05-2021,40.0,10.0,Snow


In [9]:
df.set_index("event", inplace=True)

In [10]:
df

Unnamed: 0_level_0,day,temperature,windspeed
event,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Rain,01-01-2021,28.0,4.0
Sunny,02-02-2021,32.0,3.0
Cloudy,03-03-2021,34.0,7.0
Cloudy,04-04-2021,34.0,8.0
Snow,05-05-2021,40.0,10.0


In [11]:
df.loc["Sunny"]

day            02-02-2021
temperature          32.0
windspeed             3.0
Name: Sunny, dtype: object

In [12]:
df.loc["Cloudy"]

Unnamed: 0_level_0,day,temperature,windspeed
event,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Cloudy,03-03-2021,34.0,7.0
Cloudy,04-04-2021,34.0,8.0


### Reading Stock dataset

In [13]:
stock_df = pd.read_csv("./stock.csv")

In [14]:
stock_df

Unnamed: 0,tickers,eps,revenue,price,people
0,GOOGL,27.82,87,845,larry page
1,WMT,4.61,484,65,n.a.
2,MSFT,-1,85,64,bill gates
3,RIL,not available,50,1023,mukesh ambani
4,TATA,5.6,-1,n.a.,ratan tata


#### Handling N.A. values in dataframe

In [15]:
# 1st target --n.a., not available ==> replace with NaN
stock_df = pd.read_csv("./stock.csv", na_values=["n.a.", "not available"])
stock_df

Unnamed: 0,tickers,eps,revenue,price,people
0,GOOGL,27.82,87,845.0,larry page
1,WMT,4.61,484,65.0,
2,MSFT,-1.0,85,64.0,bill gates
3,RIL,,50,1023.0,mukesh ambani
4,TATA,5.6,-1,,ratan tata


#### Adding -1 as NaN value

In [16]:
stock_df = pd.read_csv("./stock.csv", na_values=["n.a.", "not available", -1])
stock_df

Unnamed: 0,tickers,eps,revenue,price,people
0,GOOGL,27.82,87.0,845.0,larry page
1,WMT,4.61,484.0,65.0,
2,MSFT,,85.0,64.0,bill gates
3,RIL,,50.0,1023.0,mukesh ambani
4,TATA,5.6,,,ratan tata


#### Setting only particular value as NaN in a column of dataframe

In [17]:
stock_df = pd.read_csv("./stock.csv", na_values={
    "eps":["not available"],
    "revenue":[-1],
    "price":["n.a."],
    "people":["n.a."]
})
stock_df

Unnamed: 0,tickers,eps,revenue,price,people
0,GOOGL,27.82,87.0,845.0,larry page
1,WMT,4.61,484.0,65.0,
2,MSFT,-1.0,85.0,64.0,bill gates
3,RIL,,50.0,1023.0,mukesh ambani
4,TATA,5.6,,,ratan tata


#### Creating a duplicate file of a dataframe as CSV without index column

In [18]:
stock_df.to_csv("stock_duplicate.csv", index=False)

In [19]:
stock_dup_df = pd.read_csv("./stock_duplicate.csv")
stock_dup_df

Unnamed: 0,tickers,eps,revenue,price,people
0,GOOGL,27.82,87.0,845.0,larry page
1,WMT,4.61,484.0,65.0,
2,MSFT,-1.0,85.0,64.0,bill gates
3,RIL,,50.0,1023.0,mukesh ambani
4,TATA,5.6,,,ratan tata
