# Read Write CSV

In [1]:
import pandas as pd

In [4]:
df = pd.read_csv('weather_data.csv')
df.head(2)

Unnamed: 0,day,temperature,windspeed,event
0,1/1/2017,32,6,Rain
1,1/2/2017,35,7,Sunny


In [6]:
#df = pd.read_excel('weather_data.xlsx','Sheet 1')
#df.head(2)

## Reading CSV

In [9]:
df = pd.read_csv('stock_data.csv')
df

Unnamed: 0,tickers,eps,revenue,price,people
0,GOOGL,27.82,87,845,larry page
1,WMT,4.61,484,65,n.a.
2,MSFT,-1,85,64,bill gates
3,RIL,not available,50,1023,mukesh ambani
4,TATA,5.6,-1,n.a.,ratan tata


#### Remove Extra Header

In [12]:
df = pd.read_csv('stock_data.csv', skiprows=1)  # It skips the rows
df

Unnamed: 0,GOOGL,27.82,87,845,larry page
0,WMT,4.61,484,65,n.a.
1,MSFT,-1,85,64,bill gates
2,RIL,not available,50,1023,mukesh ambani
3,TATA,5.6,-1,n.a.,ratan tata


In [14]:
df = pd.read_csv('stock_data.csv', header=1)  # It also do the same which is telling that my header starts from row 1
df

Unnamed: 0,GOOGL,27.82,87,845,larry page
0,WMT,4.61,484,65,n.a.
1,MSFT,-1,85,64,bill gates
2,RIL,not available,50,1023,mukesh ambani
3,TATA,5.6,-1,n.a.,ratan tata


In [16]:
df = pd.read_csv('stock_data.csv', header=None, names=['co1','col2','col3','col4'])  # Suppose we dont have col names so we can set col names like that too
df

Unnamed: 0,co1,col2,col3,col4
tickers,eps,revenue,price,people
GOOGL,27.82,87,845,larry page
WMT,4.61,484,65,n.a.
MSFT,-1,85,64,bill gates
RIL,not available,50,1023,mukesh ambani
TATA,5.6,-1,n.a.,ratan tata


## Tackling NA values

In [18]:
df = pd.read_csv('stock_data.csv')
df

Unnamed: 0,tickers,eps,revenue,price,people
0,GOOGL,27.82,87,845,larry page
1,WMT,4.61,484,65,n.a.
2,MSFT,-1,85,64,bill gates
3,RIL,not available,50,1023,mukesh ambani
4,TATA,5.6,-1,n.a.,ratan tata


In [19]:
# it means that whenever you encounter these kind of words replace with NAN values
df = pd.read_csv('stock_data.csv', na_values=['not available','n.a.'])
df

Unnamed: 0,tickers,eps,revenue,price,people
0,GOOGL,27.82,87,845.0,larry page
1,WMT,4.61,484,65.0,
2,MSFT,-1.0,85,64.0,bill gates
3,RIL,,50,1023.0,mukesh ambani
4,TATA,5.6,-1,,ratan tata


In [20]:
# if we want to replace values with NAN on targeted columns then we can do this too
# in excel we use converter function for this work
df = pd.read_csv('stock_data.csv', na_values={
    'eps':['not available','n.a.'],
    'revenue':['not available','n.a.',-1],
    'people':['not available','n.a.']
})
df

Unnamed: 0,tickers,eps,revenue,price,people
0,GOOGL,27.82,87.0,845,larry page
1,WMT,4.61,484.0,65,
2,MSFT,-1.0,85.0,64,bill gates
3,RIL,,50.0,1023,mukesh ambani
4,TATA,5.6,,n.a.,ratan tata


## Write Back to CSV

In [22]:
df.to_csv('new.csv',index=False)  # It save our current df data to csv without index

In [23]:
df.columns

Index(['tickers', 'eps', 'revenue', 'price', 'people'], dtype='object')

In [26]:
df.to_csv("newnew.csv",columns=['tickers','eps']) # it saved a csv with selective columns from our dataframe

## Properties

https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.read_csv.html