# Data-IO

## Save data to a file from a url

In [1]:
!curl https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data -o "./data/iris.csv"

  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed

  0     0    0     0    0     0      0      0 --:--:-- --:--:-- --:--:--     0
  0     0    0     0    0     0      0      0 --:--:-- --:--:-- --:--:--     0
  0     0    0     0    0     0      0      0 --:--:--  0:00:01 --:--:--     0
100  4551  100  4551    0     0   2240      0  0:00:02  0:00:02 --:--:--  2240


## Load data into dataframe

In [2]:
import pandas as pd

# load a csv file into dataframe
df = pd.read_csv("./data/iris.csv" , header=None)
df.head()

Unnamed: 0,0,1,2,3,4
0,5.1,3.5,1.4,0.2,Iris-setosa
1,4.9,3.0,1.4,0.2,Iris-setosa
2,4.7,3.2,1.3,0.2,Iris-setosa
3,4.6,3.1,1.5,0.2,Iris-setosa
4,5.0,3.6,1.4,0.2,Iris-setosa


## Write dataframe into csv or tsv

In [3]:
# Write dataframe to csv
df.to_csv("./data/another-iris.csv", header=None, index=False, sep=',', encoding='utf-8')

# Write dataframe to tsv
df.to_csv("./data/iris.tsv", header=None, index=False, sep='\t', encoding='utf-8')

## Load data into dataframe

In [4]:
df = pd.read_csv("./data/another-iris.csv", header=None)
df.head()

Unnamed: 0,0,1,2,3,4
0,5.1,3.5,1.4,0.2,Iris-setosa
1,4.9,3.0,1.4,0.2,Iris-setosa
2,4.7,3.2,1.3,0.2,Iris-setosa
3,4.6,3.1,1.5,0.2,Iris-setosa
4,5.0,3.6,1.4,0.2,Iris-setosa


In [5]:
# load a tsv file into dataframe
df = pd.read_csv("./data/iris.tsv", header=None, delimiter='\t')
df.head()

Unnamed: 0,0,1,2,3,4
0,5.1,3.5,1.4,0.2,Iris-setosa
1,4.9,3.0,1.4,0.2,Iris-setosa
2,4.7,3.2,1.3,0.2,Iris-setosa
3,4.6,3.1,1.5,0.2,Iris-setosa
4,5.0,3.6,1.4,0.2,Iris-setosa


## Add headers when loading from a file, OR

In [6]:
df = pd.read_csv("./data/iris.tsv", header=None, delimiter='\t', 
                 names=['SepalLengthCm', 'SepalWidthCm', 'PetalLengthCm', 'PetalWidthCm', 'Species'])
df.head()

Unnamed: 0,SepalLengthCm,SepalWidthCm,PetalLengthCm,PetalWidthCm,Species
0,5.1,3.5,1.4,0.2,Iris-setosa
1,4.9,3.0,1.4,0.2,Iris-setosa
2,4.7,3.2,1.3,0.2,Iris-setosa
3,4.6,3.1,1.5,0.2,Iris-setosa
4,5.0,3.6,1.4,0.2,Iris-setosa


## Add headers to dataframe

In [7]:
df = pd.read_csv("./data/iris.tsv", header=None, delimiter='\t')
df.columns = ['SepalLengthCm', 'SepalWidthCm', 'PetalLengthCm', 'PetalWidthCm', 'Species']
df.head()

Unnamed: 0,SepalLengthCm,SepalWidthCm,PetalLengthCm,PetalWidthCm,Species
0,5.1,3.5,1.4,0.2,Iris-setosa
1,4.9,3.0,1.4,0.2,Iris-setosa
2,4.7,3.2,1.3,0.2,Iris-setosa
3,4.6,3.1,1.5,0.2,Iris-setosa
4,5.0,3.6,1.4,0.2,Iris-setosa


In [8]:
# Write dataframe to tsv
df.to_csv("./data/iris-with-header.tsv", index=False, sep="\t", encoding="utf-8")