# Example of loading CSV dataset

## Standard python library

In [16]:
#libreria csv ufficiale python https://docs.python.org/3.6/library/csv.html
from csv import reader

# Function of loading a CSV file
def load_csv(filename):
    dataset = list()
    with open(filename, 'r') as file:
        csv_reader = reader(file)
        for row in csv_reader:
            if not row:
                continue
            dataset.append(row)
    return dataset

In [17]:
# Convert string column to float
def str_column_to_float(dataset, column):
    for row in dataset:
        row[column] = float(row[column].strip())

In [22]:
# Convert string column to integer
def str_column_to_int(dataset, column):
    class_values = [row[column] for row in dataset]
    unique = set(class_values)
    lookup = dict()
    for i, value in enumerate(unique):
        lookup[value] = i
    for row in dataset:
        row[column] = lookup[row[column]]
    return lookup

In [21]:
# Load dataset pima-indians-diabetes.csv - only numbers
filename = 'pima-indians-diabetes.csv'
dataset = load_csv(filename)
# reference pyformat https://pyformat.info/
print('Loaded data file {0} with {1} rows and {2} columns'.format(filename, len(dataset), len(dataset[0])))
print(dataset[0])
# convert string columns to float
for i in range(len(dataset[0])):
    str_column_to_float(dataset, i)
print(dataset[0])

Loaded data file pima-indians-diabetes.csv with 768 rows and 9 columns
['6', '148', '72', '35', '0', '33.6', '0.627', '50', '1']
[6.0, 148.0, 72.0, 35.0, 0.0, 33.6, 0.627, 50.0, 1.0]


In [23]:
# Load dataset iris.csv - numbers & string 
filename = 'iris.csv'
dataset = load_csv(filename)
print('Loaded data file {0} with {1} rows and {2} columns'.format(filename, len(dataset), len(dataset[0])))
print(dataset[0])
# convert string columns to float
for i in range(4):
    str_column_to_float(dataset, i)
# convert class column to int
lookup = str_column_to_int(dataset, 4)
print(dataset[0])
print(lookup)

Loaded data file iris.csv with 150 rows and 5 columns
['5.1', '3.5', '1.4', '0.2', 'Iris-setosa']
[5.1, 3.5, 1.4, 0.2, 1]
{'Iris-virginica': 0, 'Iris-setosa': 1, 'Iris-versicolor': 2}


## Pandas library

In [43]:
# reference http://pandas.pydata.org/pandas-docs/stable/generated/pandas.read_csv.html
# Load dataset pima-indians-diabetes.csv - only numbers

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

#filename = 'pima-indians-diabetes.csv'
filename = 'iris.csv'
dataset = pd.read_csv(filename, header=None)
dataset.head(5)

Unnamed: 0,0,1,2,3,4
0,5.1,3.5,1.4,0.2,Iris-setosa
1,4.9,3.0,1.4,0.2,Iris-setosa
2,4.7,3.2,1.3,0.2,Iris-setosa
3,4.6,3.1,1.5,0.2,Iris-setosa
4,5.0,3.6,1.4,0.2,Iris-setosa


## Numpy library

In [44]:
# reference http://docs.scipy.org/doc/numpy/reference/generated/numpy.loadtxt.html
# Load dataset iris.csv

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

filename = 'iris.csv'
dataset = np.loadtxt(filename,
   dtype={'names': ('sepal length', 'sepal width', 'petal length', 'petal width', 'label'),
          'formats': (np.float, np.float, np.float, np.float, '|S15')},
   delimiter=',', skiprows=0)

dataset

array([(5.1, 3.5, 1.4, 0.2, b'Iris-setosa'),
       (4.9, 3.0, 1.4, 0.2, b'Iris-setosa'),
       (4.7, 3.2, 1.3, 0.2, b'Iris-setosa'),
       (4.6, 3.1, 1.5, 0.2, b'Iris-setosa'),
       (5.0, 3.6, 1.4, 0.2, b'Iris-setosa'),
       (5.4, 3.9, 1.7, 0.4, b'Iris-setosa'),
       (4.6, 3.4, 1.4, 0.3, b'Iris-setosa'),
       (5.0, 3.4, 1.5, 0.2, b'Iris-setosa'),
       (4.4, 2.9, 1.4, 0.2, b'Iris-setosa'),
       (4.9, 3.1, 1.5, 0.1, b'Iris-setosa'),
       (5.4, 3.7, 1.5, 0.2, b'Iris-setosa'),
       (4.8, 3.4, 1.6, 0.2, b'Iris-setosa'),
       (4.8, 3.0, 1.4, 0.1, b'Iris-setosa'),
       (4.3, 3.0, 1.1, 0.1, b'Iris-setosa'),
       (5.8, 4.0, 1.2, 0.2, b'Iris-setosa'),
       (5.7, 4.4, 1.5, 0.4, b'Iris-setosa'),
       (5.4, 3.9, 1.3, 0.4, b'Iris-setosa'),
       (5.1, 3.5, 1.4, 0.3, b'Iris-setosa'),
       (5.7, 3.8, 1.7, 0.3, b'Iris-setosa'),
       (5.1, 3.8, 1.5, 0.3, b'Iris-setosa'),
       (5.4, 3.4, 1.7, 0.2, b'Iris-setosa'),
       (5.1, 3.7, 1.5, 0.4, b'Iris-setosa'),
       (4.