# Input Output - CSV files
----

**How to read/write to a variety of data sources-**

https://pandas.pydata.org/pandas-docs/stable/user_guide/io.html

----

In [1]:
import pandas as pd

In [23]:
df = pd.read_csv('example.csv')
df

Unnamed: 0,a,b,c,d
0,0,1,2,3
1,4,5,6,7
2,8,9,10,11
3,12,13,14,15


In [7]:
df.index

RangeIndex(start=0, stop=4, step=1)

In [32]:
df.columns

Index(['b', 'c', 'd'], dtype='object')

----

In [37]:
# making column at index position 1 as labeled index

df = pd.read_csv('example.csv',index_col=0)
df

Unnamed: 0_level_0,b,c,d
a,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
0,1,2,3
4,5,6,7
8,9,10,11
12,13,14,15


In [38]:
# dropping first row

df = df.drop(0)
df

Unnamed: 0_level_0,b,c,d
a,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
4,5,6,7
8,9,10,11
12,13,14,15


----

In [44]:
# saving the updated dataframe

#PARAMETER  index = True will save labeled index info as a column
#DEFAULT index=TRUE

df.to_csv('example_updated.csv',index=True)
pd.read_csv('example_updated.csv')

Unnamed: 0,a,b,c,d
0,4,5,6,7
1,8,9,10,11
2,12,13,14,15


In [43]:
#PARAMETER  index = False will not save labeled index info 

df.to_csv('example_updated.csv',index=False)
pd.read_csv('example_updated.csv')

Unnamed: 0,b,c,d
0,5,6,7
1,9,10,11
2,13,14,15


----

# Input Output - Excel files

<br>

- Pandas can only read write in raw data. Cannot read in macros, visualisations or formulas.
<br>

- Pandas treats an excel workbook as a dictionary. 
    - Key = sheet name
    - Value - dataframe representing the sheet  
<br>
<br>

- Extra libraries required : openpyxl, xlrd
    - Pandas runs on top of openpyxl and xlrd

In [55]:
# To read a particular sheet

df = pd.read_excel('my_excel_file.xlsx',sheet_name='Second_Sheet')
df

#DEFAULT: sheet_name gives first sheet

Unnamed: 0,I,II,III
0,1,2,3
1,4,5,6
2,7,8,9


----

In [49]:
# To read the entire workbook

wb = pd.ExcelFile('my_excel_file.xlsx')

In [52]:
# Returns list of sheet names

wb.sheet_names

['First_Sheet', 'Second_Sheet']

In [56]:
# Reading the first sheet using the above method

pd.read_excel('my_excel_file.xlsx',sheet_name=wb.sheet_names[0])

Unnamed: 0,a,b,c,d
0,0,1,2,3
1,4,5,6,7
2,8,9,10,11
3,12,13,14,15


----

In [57]:
# READING THE ENTIRE WORKBOOK WITHOUT GRABBING ANY SHEET

excel_sheet_dict = pd.read_excel('my_excel_file.xlsx',sheet_name=None)

# setting sheet_names = None creates a dictionary of the roskbook
# key = sheet name
# value = dataframe in sheet

In [60]:
excel_sheet_dict

{'First_Sheet':     a   b   c   d
 0   0   1   2   3
 1   4   5   6   7
 2   8   9  10  11
 3  12  13  14  15,
 'Second_Sheet':    I  II  III
 0  1   2    3
 1  4   5    6
 2  7   8    9}

In [62]:
type(excel_sheet_dict)

dict

In [64]:
excel_sheet_dict.keys()

dict_keys(['First_Sheet', 'Second_Sheet'])

In [66]:
# GRABBING THE FIRST SHEET FROM THE DICTIONARY (WORKBOOK)

mydf = excel_sheet_dict['First_Sheet']
mydf

Unnamed: 0,a,b,c,d
0,0,1,2,3
1,4,5,6,7
2,8,9,10,11
3,12,13,14,15


----

### Writing to an excel file
----

In [68]:
mydf.to_excel('example.xlsx',sheet_name = 'sample')

In [69]:
pd.read_excel('example.xlsx')

Unnamed: 0.1,Unnamed: 0,a,b,c,d
0,0,0,1,2,3
1,1,4,5,6,7
2,2,8,9,10,11
3,3,12,13,14,15


In [70]:
# rewriting with index not saved

mydf.to_excel('example.xlsx',sheet_name = 'sample',index=False)
pd.read_excel('example.xlsx')

Unnamed: 0,a,b,c,d
0,0,1,2,3
1,4,5,6,7
2,8,9,10,11
3,12,13,14,15
