# Pandas: Importing and Exporting

In [1]:
import os

## IMPORTING AND EXPORTING DELIMITED FILES


### Comma separated values (CSV) files

A file that is separated (or delimited) by commas (or anything else)

Example:
    
**-- comma separated file --**

    name,year,value
    Andre,2020,100
    Fernanda,1900,1
    
**-- tab separated file --**

    name    year    value
    Andre    2020    100
    Fernanda    1900    1
    
**-- tab separated file (another way) --**

    name\tyear\tvalue
    Andre\t2020\t100
    Fernanda\t1900\t1
    
**-- hash separated file --**

    name#year#value
    Andre#2020#100
    Fernanda#1900#1

**-- pipe separated file --**

    name|year|value
    Andre|2020|100
    Fernanda|1900|1

    ...

### Import comma-separated variable file (the default)

In [2]:
import pandas as pd

In [5]:
pwd

'D:\\[DATA_PYTHON]\\( 2020 JUN DATA FT )\\Week 02_Day2_b_Importing and Exporting Data'

In [6]:
pd.read_csv('data/vehicles')

FileNotFoundError: [Errno 2] File data/vehicles does not exist: 'data/vehicles'

In [7]:
df = pd.read_csv('../data/vehicles.csv')

FileNotFoundError: [Errno 2] File ../data/vehicles.csv does not exist: '../data/vehicles.csv'

In [25]:
df.head()

Unnamed: 0,Make,Model,Year,Engine Displacement,Cylinders,Transmission,Drivetrain,Vehicle Class,Fuel Type,Fuel Barrels/Year,City MPG,Highway MPG,Combined MPG,CO2 Emission Grams/Mile,Fuel Cost/Year
0,AM General,DJ Po Vehicle 2WD,1984,2.5,4.0,Automatic 3-spd,2-Wheel Drive,Special Purpose Vehicle 2WD,Regular,19.388824,18,17,17,522.764706,1950
1,AM General,FJ8c Post Office,1984,4.2,6.0,Automatic 3-spd,2-Wheel Drive,Special Purpose Vehicle 2WD,Regular,25.354615,13,13,13,683.615385,2550
2,AM General,Post Office DJ5 2WD,1985,2.5,4.0,Automatic 3-spd,Rear-Wheel Drive,Special Purpose Vehicle 2WD,Regular,20.600625,16,17,16,555.4375,2100
3,AM General,Post Office DJ8 2WD,1985,4.2,6.0,Automatic 3-spd,Rear-Wheel Drive,Special Purpose Vehicle 2WD,Regular,25.354615,13,13,13,683.615385,2550
4,ASC Incorporated,GNX,1987,3.8,6.0,Automatic 4-spd,Rear-Wheel Drive,Midsize Cars,Premium,20.600625,14,21,16,555.4375,2550


In [26]:
df.head(2)

Unnamed: 0,Make,Model,Year,Engine Displacement,Cylinders,Transmission,Drivetrain,Vehicle Class,Fuel Type,Fuel Barrels/Year,City MPG,Highway MPG,Combined MPG,CO2 Emission Grams/Mile,Fuel Cost/Year
0,AM General,DJ Po Vehicle 2WD,1984,2.5,4.0,Automatic 3-spd,2-Wheel Drive,Special Purpose Vehicle 2WD,Regular,19.388824,18,17,17,522.764706,1950
1,AM General,FJ8c Post Office,1984,4.2,6.0,Automatic 3-spd,2-Wheel Drive,Special Purpose Vehicle 2WD,Regular,25.354615,13,13,13,683.615385,2550


In [6]:
df.describe()

Unnamed: 0,Year,Engine Displacement,Cylinders,Fuel Barrels/Year,City MPG,Highway MPG,Combined MPG,CO2 Emission Grams/Mile,Fuel Cost/Year
count,35952.0,35952.0,35952.0,35952.0,35952.0,35952.0,35952.0,35952.0,35952.0
mean,2000.7164,3.338493,5.765076,17.609056,17.646139,23.880646,19.929322,475.316339,1892.598465
std,10.08529,1.359395,1.755268,4.467283,4.769349,5.890876,5.112409,119.060773,506.958627
min,1984.0,0.6,2.0,0.06,6.0,9.0,7.0,37.0,600.0
25%,1991.0,2.2,4.0,14.699423,15.0,20.0,16.0,395.0,1500.0
50%,2001.0,3.0,6.0,17.347895,17.0,24.0,19.0,467.736842,1850.0
75%,2010.0,4.3,6.0,20.600625,20.0,27.0,23.0,555.4375,2200.0
max,2017.0,8.4,16.0,47.087143,58.0,61.0,56.0,1269.571429,5800.0


In [7]:
!pwd

/c/Users/andreaguiar/Desktop/usr/dev/ironhack/ft202006/classes/week2/4_Pandas_Import_Export


In [None]:
# absolute path
pd.read_csv('/Users/andreaguiar/Desktop/usr/dev/ironhack/ft202002/classes/week2/4_Pandas_Import_Export')

In [None]:
# relative path
pd.read_csv('data/vehicles.csv')


In this way, the `pd.read_csv()` method is just outputing the results on screen.

If I wanted to store it in a variable I would have to **assign** this result into a variable

In [10]:
vehicles = pd.read_csv('data/vehicles.csv')

## Import tab-delimited file


In [11]:
pd.read_csv('data/vehicles_tab.txt', sep='\t')

Unnamed: 0,Make,Model,Year,Engine Displacement,Cylinders,Transmission,Drivetrain,Vehicle Class,Fuel Type,Fuel Barrels/Year,City MPG,Highway MPG,Combined MPG,CO2 Emission Grams/Mile,Fuel Cost/Year
0,AM General,DJ Po Vehicle 2WD,1984,2.5,4.0,Automatic 3-spd,2-Wheel Drive,Special Purpose Vehicle 2WD,Regular,19.388824,18,17,17,522.764706,1950
1,AM General,FJ8c Post Office,1984,4.2,6.0,Automatic 3-spd,2-Wheel Drive,Special Purpose Vehicle 2WD,Regular,25.354615,13,13,13,683.615385,2550
2,AM General,Post Office DJ5 2WD,1985,2.5,4.0,Automatic 3-spd,Rear-Wheel Drive,Special Purpose Vehicle 2WD,Regular,20.600625,16,17,16,555.437500,2100
3,AM General,Post Office DJ8 2WD,1985,4.2,6.0,Automatic 3-spd,Rear-Wheel Drive,Special Purpose Vehicle 2WD,Regular,25.354615,13,13,13,683.615385,2550
4,ASC Incorporated,GNX,1987,3.8,6.0,Automatic 4-spd,Rear-Wheel Drive,Midsize Cars,Premium,20.600625,14,21,16,555.437500,2550
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
35947,smart,fortwo coupe,2013,1.0,3.0,Auto(AM5),Rear-Wheel Drive,Two Seaters,Premium,9.155833,34,38,36,244.000000,1100
35948,smart,fortwo coupe,2014,1.0,3.0,Auto(AM5),Rear-Wheel Drive,Two Seaters,Premium,9.155833,34,38,36,243.000000,1100
35949,smart,fortwo coupe,2015,1.0,3.0,Auto(AM5),Rear-Wheel Drive,Two Seaters,Premium,9.155833,34,38,36,244.000000,1100
35950,smart,fortwo coupe,2016,0.9,3.0,Auto(AM6),Rear-Wheel Drive,Two Seaters,Premium,9.155833,34,39,36,246.000000,1100


In [12]:
print('\tb\n\ta')

	b
	a


In [None]:
pd.read_csv('data/vehicles_tab.txt', sep='\t')

## Import pipe delimited file 

In [None]:
df = pd.read_csv('data/vehicles_pipe.txt', sep='|')
df.head()

In [None]:
# hash
df = pd.read_csv('data/vehicles_hash.txt', sep='#')
df.head()

# Importing Excel FIles

In [None]:
pd.read_excel('data/vehicles.xlsx')

In [None]:
aldrey_treated = pd.read_excel('data/dados_artigo1.xlsx', header=1, skiprows=[2,3], usecols='B:M')

In [None]:
aldrey_treated

## Read from specific excel sheet

In [None]:
df = pd.read_excel('data/dados_artigo1.xlsx', sheet_name='dataset')
df.head()

# Exporting datasets

## Export TO EXCEL

In [None]:
aldrey_treated.to_excel('data/aldrey_treated.xlsx', sheet_name='treated')

In [None]:
test = aldrey_treated*10

In [None]:
# this overrides the file
test.to_excel('data/aldrey_treated.xlsx', sheet_name='treated_10')

## BONUS: Saving to multiple sheets

In [None]:
writer = pd.ExcelWriter('data/aldrey_treated2.xlsx', engine = 'openpyxl')

In [None]:
aldrey_treated.to_excel(writer, sheet_name='treated')
test.to_excel(writer, sheet_name='treated_10')

In [None]:
writer.save()
writer.close()

## Export TO CSV

In [None]:
df.head()

In [None]:
df.to_csv('data/exported_file_v1.csv', index=False)

In [None]:
pd.read_csv('data/exported_file_v1.csv')

In [None]:
# pd.read_csv('data/exported_file_v2.csv', index_col=0)

**NOTE**: If you do not specify the argument `index=False`, the output in the csv file will create an unnamed index column with the *dataframe indexes*.

In [None]:
df.to_csv('data/exported_file.csv', index=False)

## Export to csv using a specific separator

### Semicolon

In [None]:
df.to_csv('data/exported_file_semicolon.csv', index=False, sep=';')

### Pipe

In [None]:
df.to_csv('data/exported_file_pipe.csv', index=False, sep='|')

### plust or minus

In [None]:
df.to_csv('data/exported_file_plusminus.csv', index=False, sep='±')

# Import and export JSON files

What is a JSON file?

JSON 1:
```json
{ "name":"John", "age":30, "car":null }
```

JSON 2: 
```json
{"students":[
   {"name":"Andre", "age":23, "state":"SP"},
   {"name":"Rafael", "age":28, "state":"RJ"},
   {"name":"Claudia", "age":32, "state":"PA"},
   {"name":"Lajos", "age":28, "state":"MA"}
]}
```

In [None]:
pd.read_json('data/vehicles.json')

In [None]:
data.head()

For a JSON file, you can have different `orient` options.

`'split'`: Dictionary containing indexes, columns, and data.

`'index'`: Nested dictionaries containing {index:{column:value}}.

`'columns'`: Nested dictionaries containing {column:{index:value}}

`'values'`: Nested list where each sublist contains the values for a record.

`'table'`: Nested dictionaries containing schema and data (records).

In [None]:
data.sample(10)

In [None]:
data.sample(10).to_json('data/vehicles_split.json', orient='split')
data.sample(10).to_json('data/vehicles_index.json', orient='index')
data.sample(10).to_json('data/vehicles_columns.json', orient='columns')
data.sample(10).to_json('data/vehicles_values.json', orient='values')
data.sample(10).to_json('data/vehicles_table.json', orient='table')


In [None]:
pd.read_json('data/vehicles_values.json', orient='values')

In [None]:
data.sample(10)

## Encoding

In [13]:
df = data.head(12)

NameError: name 'data' is not defined

In [None]:
#df.to_csv('data/teste.csv')

In [19]:
pd.read_csv('data/CSV.8859-1.csv')

UnicodeDecodeError: 'utf-8' codec can't decode byte 0xe1 in position 1: invalid continuation byte

In [18]:
pd.read_csv('data/CSV.8859-1.csv', encoding='latin-1', sep=';').sample(20)

Unnamed: 0,Data,Projeto(s),Ini,Ter,Horas,Atividade,Unnamed: 6,Unnamed: 7,Unnamed: 8,Unnamed: 9
90,"ter, 28/01",RS,16:00,16:30,00:30,Automatização da extração Inova,,,,
76,"qui, 23/01",PRODESP,14:00,15:30,01:30,Sistemas APESP,,,,
70,"qua, 22/01",PRODESP,13:30,16:00,02:30,Sistemas APESP,,,,
43,"qua, 15/01",PRODESP,11:30,12:00,00:30,Sistema APESP - ambiente Maurício,,,,
61,"seg, 20/01",RS,13:30,15:00,01:30,Extração Inova,,,,
65,"ter, 21/01",GTE,12:30,13:30,01:00,Atividades administrativas,,,,
24,"qui, 09/01",RS,13:30,14:30,01:00,Bug na extação Inova,,,,
16,"ter, 07/01",GTE - Orçamento,15:30,17:30,02:00,Estudo para módulo orçamentário,,,,
83,"dom, 26/01",PRODESP,14:00,17:00,03:00,Sistemas APESP,,,,
82,"sex, 24/01",PRODESP,15:30,18:00,02:30,Sistemas APESP,,,,


In [17]:
pd.read_csv('data/CSV.UTF8.csv', sep=';').sample(20)

Unnamed: 0,Data,Projeto(s),Ini,Ter,Horas,Atividade,Unnamed: 6,Unnamed: 7,Unnamed: 8,Unnamed: 9
79,"sex, 24/01",Paulistana,10:00,11:00,01:00,Relatório de monitoramento,,,,
58,"dom, 19/01",PRODESP,02:30,12:00,09:30,Sistemas APESP,,,,
14,"ter, 07/01",GTE - Git,13:30,14:00,00:30,Workshop Git,,,,
83,"dom, 26/01",PRODESP,14:00,17:00,03:00,Sistemas APESP,,,,
75,"qui, 23/01",RS,13:30,14:00,00:30,Sobre a extração da terceira edição,,,,
37,"ter, 14/01",Paulistana,13:30,14:00,00:30,Relatório de monitoramento,,,,
3,"qui, 02/01",GTE - FC,14:00,15:00,01:00,Troubleshooting Contrato X Rubricas,,,,
64,"ter, 21/01",PRODESP,11:00,11:30,00:30,Sistemas APESP,,,,
71,"qua, 22/01",Paulistana,16:00,16:30,00:30,Atualização de ambiente,,,,
2,"qui, 02/01",GTE,13:30,14:00,00:30,Tarefas administrativas,,,,


# Importing files from URLs

Example: https://github.com/rfordatascience/tidytuesday

In [22]:
pd.read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2020/2020-06-09/firsts.csv')

Unnamed: 0,year,accomplishment,person,gender,category
0,1738,First free African-American community,Gracia Real de Santa Teresa de Mose (later nam...,African-American Firsts,Social & Jobs
1,1760,First known African-American published author,Jupiter Hammon (poem An Evening Thought,Female African American Firsts,Arts & Entertainment
2,1768,First known African-American to be elected to ...,"Wentworth Cheswell, town constable in Newmarke...",African-American Firsts,Social & Jobs
3,1773,First known African-American woman to publish ...,"Phillis Wheatley (Poems on Various Subjects, R...",Female African American Firsts,Arts & Entertainment
4,1773,First separate African-American church,"Silver Bluff Baptist Church, Aiken County, Sou...",African-American Firsts,Religion
...,...,...,...,...,...
474,2019,First African-American to win an Academy Award...,Hannah Beachler for Black Panther [253],African-American Firsts,Arts & Entertainment
475,2019,First member of the British royal family of Af...,Archie Mountbatten-Windsor,African-American Firsts,Social & Jobs
476,2019,First African-American secretary of the Smiths...,Lonnie Bunch,African-American Firsts,Arts & Entertainment
477,2019,First African-American female director of an A...,Denise Verret[254],Female African American Firsts,Arts & Entertainment


In [23]:
af = pd.read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2020/2020-06-09/firsts.csv')

In [24]:
af.head()

Unnamed: 0,year,accomplishment,person,gender,category
0,1738,First free African-American community,Gracia Real de Santa Teresa de Mose (later nam...,African-American Firsts,Social & Jobs
1,1760,First known African-American published author,Jupiter Hammon (poem An Evening Thought,Female African American Firsts,Arts & Entertainment
2,1768,First known African-American to be elected to ...,"Wentworth Cheswell, town constable in Newmarke...",African-American Firsts,Social & Jobs
3,1773,First known African-American woman to publish ...,"Phillis Wheatley (Poems on Various Subjects, R...",Female African American Firsts,Arts & Entertainment
4,1773,First separate African-American church,"Silver Bluff Baptist Church, Aiken County, Sou...",African-American Firsts,Religion
