# Reading and writing Data

#### Reading files

In [1]:
import pandas as pd

In [4]:
df = pd.read_csv("covid19_dataset.csv")
df.head()

Unnamed: 0,Serial Number,Date,Province/State,Country,Last Update,Confirmed,Deaths,Recovered
0,1,22/01/2020,,Japan,1/22/2020 12:00,2,0,0
1,2,22/01/2020,,Thailand,1/22/2020 12:00,2,0,0
2,3,22/01/2020,,South Korea,1/22/2020 12:00,1,0,0
3,4,23/01/2020,Anhui,Mainland China,1/23/2020 12:00,9,0,0
4,5,23/01/2020,Hong Kong,Hong Kong,1/23/2020 12:00,2,0,0


#### used read_table and specified the delimiter

In [5]:
df = pd.read_table("covid19_dataset.csv", sep=',')
df.head()

Unnamed: 0,Serial Number,Date,Province/State,Country,Last Update,Confirmed,Deaths,Recovered
0,1,22/01/2020,,Japan,1/22/2020 12:00,2,0,0
1,2,22/01/2020,,Thailand,1/22/2020 12:00,2,0,0
2,3,22/01/2020,,South Korea,1/22/2020 12:00,1,0,0
3,4,23/01/2020,Anhui,Mainland China,1/23/2020 12:00,9,0,0
4,5,23/01/2020,Hong Kong,Hong Kong,1/23/2020 12:00,2,0,0


#### allow pandas to assign default column names, or you can specify names yourself

In [4]:
df = pd.read_csv("covid19_dataset.csv", header=None)
df.head()

Unnamed: 0,0,1,2,3,4,5,6,7
0,Serial Number,Date,Province/State,Country,Last Update,Confirmed,Deaths,Recovered
1,1,22/01/2020,NU,Japan,1/22/2020 12:00,2,0,0
2,2,22/01/2020,NU,NU,1/22/2020 12:00,NU,0,0
3,3,NU,NU,South Korea,1/22/2020 12:00,1,0,0
4,4,23/01/2020,Anhui,Mainland China,1/23/2020 12:00,9,0,0


In [7]:
df.columns =['a','b','c','d','e','f','h','j']

In [8]:
df

Unnamed: 0,a,b,c,d,e,f,h,j
0,Serial Number,Date,Province/State,Country,Last Update,Confirmed,Deaths,Recovered
1,1,22/01/2020,NU,Japan,1/22/2020 12:00,2,0,0
2,2,22/01/2020,NU,NU,1/22/2020 12:00,NU,0,0
3,3,NU,NU,South Korea,1/22/2020 12:00,1,0,0
4,4,23/01/2020,Anhui,Mainland China,1/23/2020 12:00,9,0,0
...,...,...,...,...,...,...,...,...
629,629,15/02/2020,,Nepal,12/02/2020 14:43,1,0,1
630,630,15/02/2020,,Sri Lanka,08/02/2020 03:43,1,0,1
631,631,15/02/2020,,Sweden,01/02/2020 02:13,1,0,0
632,632,16/02/2020,Diamond Princess cruise ship,Others,16/02/2020 03:43,355,0,0


In [7]:
df = pd.read_csv("covid19_dataset.csv", names=['a','b','c','d','e','f','h'])
df.head()

Unnamed: 0,a,b,c,d,e,f,h
Serial Number,Date,Province/State,Country,Last Update,Confirmed,Deaths,Recovered
1,22/01/2020,,Japan,1/22/2020 12:00,2,0,0
2,22/01/2020,,Thailand,1/22/2020 12:00,2,0,0
3,22/01/2020,,South Korea,1/22/2020 12:00,1,0,0
4,23/01/2020,Anhui,Mainland China,1/23/2020 12:00,9,0,0


#### column to be the index of the returned DataFrame

In [8]:
df = pd.read_csv("covid19_dataset.csv", index_col='Date')
df.head()

Unnamed: 0_level_0,Serial Number,Province/State,Country,Last Update,Confirmed,Deaths,Recovered
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
22/01/2020,1,,Japan,1/22/2020 12:00,2,0,0
22/01/2020,2,,Thailand,1/22/2020 12:00,2,0,0
22/01/2020,3,,South Korea,1/22/2020 12:00,1,0,0
23/01/2020,4,Anhui,Mainland China,1/23/2020 12:00,9,0,0
23/01/2020,5,Hong Kong,Hong Kong,1/23/2020 12:00,2,0,0


#### you can skip the first, third, and fourth rows of a file with skiprows:

In [9]:
df = pd.read_csv("covid19_dataset.csv", skiprows=[0,2,3])
df.head()

Unnamed: 0,1,22/01/2020,Unnamed: 2,Japan,1/22/2020 12:00,2,0,0.1
0,4,23/01/2020,Anhui,Mainland China,1/23/2020 12:00,9,0,0
1,5,23/01/2020,Hong Kong,Hong Kong,1/23/2020 12:00,2,0,0
2,6,23/01/2020,Macau,Macau,1/23/2020 12:00,2,0,0
3,7,23/01/2020,Taiwan,Taiwan,1/23/2020 12:00,1,0,0
4,8,23/01/2020,,Japan,1/23/2020 12:00,1,0,0


#### The na_values option can take either a list or set of strings to consider missingvalues:

In [11]:
df = pd.read_csv("covid19_dataset.csv")
df.head()

Unnamed: 0,Serial Number,Date,Province/State,Country,Last Update,Confirmed,Deaths,Recovered
0,1,22/01/2020,NU,Japan,1/22/2020 12:00,2,0,0
1,2,22/01/2020,NU,NU,1/22/2020 12:00,NU,0,0
2,3,NU,NU,South Korea,1/22/2020 12:00,1,0,0
3,4,23/01/2020,Anhui,Mainland China,1/23/2020 12:00,9,0,0
4,5,23/01/2020,Hong Kong,Hong Kong,1/23/2020 12:00,2,0,0


In [12]:
df = pd.read_csv("covid19_dataset.csv", na_values=['NU'])
df.head()

Unnamed: 0,Serial Number,Date,Province/State,Country,Last Update,Confirmed,Deaths,Recovered
0,1,22/01/2020,,Japan,1/22/2020 12:00,2.0,0,0
1,2,22/01/2020,,,1/22/2020 12:00,,0,0
2,3,,,South Korea,1/22/2020 12:00,1.0,0,0
3,4,23/01/2020,Anhui,Mainland China,1/23/2020 12:00,9.0,0,0
4,5,23/01/2020,Hong Kong,Hong Kong,1/23/2020 12:00,2.0,0,0


##### If you want to only read a small number of rows (avoiding reading the entire file),specify that with nrows:

In [11]:
pd.read_csv('covid19_dataset.csv', nrows=3)


Unnamed: 0,Serial Number,Date,Province/State,Country,Last Update,Confirmed,Deaths,Recovered
0,1,22/01/2020,NU,Japan,1/22/2020 12:00,2,0,0
1,2,22/01/2020,NU,NU,1/22/2020 12:00,NU,0,0
2,3,NU,NU,South Korea,1/22/2020 12:00,1,0,0


# Writing Data to Text Format
**can also be exported to a delimited format**

In [17]:
df.to_csv('out.csv')


In [18]:
df.to_csv('out_1.csv', index=False, header=False)


# Reading Microsoft Excel Files


In [14]:
xlsx = pd.read_excel('work.xlsx','Sheet1', index_col='CustomerID')
xlsx.head()

Unnamed: 0_level_0,Gender,Age,Annual Income (k$),Spending Score (1-100)
CustomerID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1,Male,19,15,39
2,Male,21,15,81
3,Female,20,16,6
4,Female,23,16,77
5,Female,31,17,40
