# Saving, Reading Files and Models

In [56]:
# For this notebook, we will just be using numpy, pandas, and scikit learn.
#The goal is to learn to manipulate data, and save and read from files in different ways.
import numpy as np
import pandas as pd
import sklearn

In [57]:
array = np.random.rand(500,10)
print(array.shape)
array

(500, 10)


array([[0.67746718, 0.5096883 , 0.70758998, ..., 0.46266417, 0.42348991,
        0.32474884],
       [0.44216742, 0.44609224, 0.32901458, ..., 0.30540781, 0.72220629,
        0.54469237],
       [0.08059496, 0.90392876, 0.29211787, ..., 0.24548088, 0.87886185,
        0.41578092],
       ...,
       [0.62540083, 0.79654494, 0.62508658, ..., 0.58520341, 0.94427071,
        0.84361504],
       [0.68516112, 0.93620323, 0.31259907, ..., 0.19049685, 0.00595479,
        0.53009262],
       [0.32973431, 0.88376546, 0.71805075, ..., 0.52903615, 0.33732599,
        0.56184093]])

In [58]:
df = pd.DataFrame(array)
df

Unnamed: 0,0,1,2,3,4,5,6,7,8,9
0,0.677467,0.509688,0.707590,0.057028,0.626685,0.137019,0.110339,0.462664,0.423490,0.324749
1,0.442167,0.446092,0.329015,0.205422,0.754569,0.945691,0.637116,0.305408,0.722206,0.544692
2,0.080595,0.903929,0.292118,0.594333,0.990000,0.458197,0.682490,0.245481,0.878862,0.415781
3,0.478495,0.756760,0.546258,0.389027,0.065202,0.551556,0.846113,0.143062,0.217625,0.690052
4,0.046914,0.156225,0.244266,0.595211,0.241328,0.860169,0.792456,0.131174,0.182643,0.866740
5,0.810560,0.518907,0.647678,0.336709,0.190755,0.617081,0.826219,0.135302,0.191167,0.242359
6,0.093540,0.810438,0.251637,0.754182,0.415910,0.315851,0.636005,0.647768,0.423704,0.731750
7,0.708361,0.377819,0.596926,0.145909,0.752150,0.819567,0.941203,0.500256,0.245274,0.965423
8,0.574685,0.918109,0.064504,0.911016,0.837742,0.579198,0.655209,0.318193,0.094880,0.432543
9,0.867086,0.420383,0.755501,0.208065,0.522503,0.240784,0.705626,0.895582,0.373973,0.672556


# Save numpy array directly to file

In [70]:
np.save(arr = array,file= 'foo',allow_pickle=True)

In [73]:
ls

 [0m[01;34mData[0m/               Intro.ipynb            Perceptron.ipynb
 Day1.ipynb          KNN.ipynb              Preprocessing.ipynb
 Day2.ipynb          LibrosaTut.ipynb       ReadMe.md
 Day3_2.ipynb        [01;34mLongfiles[0m/             Scikit-learn.ipynb
 Day3.ipynb         'Main Notebook.ipynb'   SciPy.ipynb
 Day3_oldest.ipynb   Matplotlib.ipynb       SVM.ipynb
 Day4.ipynb          MIDI.ipynb            'Torch Intro.ipynb'
 Day5b.ipynb         NumPy.ipynb           [01;34m'Untitled Folder'[0m/
 foo.npy             Pandas.ipynb


In [74]:
np.fromfile('./foo.npy')

array([1.87585069e-309, 1.17119999e+171, 5.22741680e-037, ...,
       5.29036152e-001, 3.37325994e-001, 5.61840928e-001])

#### 

# Save Pandas dataframe and read csv, tsv, excel files

In [61]:
# write to a csv file
df.to_csv('foo.csv', index=False)

In [62]:
# list files in the current working directory

In [63]:
ls

 [0m[01;34mData[0m/               foo.npy                Pandas.ipynb
 Day1.ipynb          Intro.ipynb            Perceptron.ipynb
 Day2.ipynb          KNN.ipynb              Preprocessing.ipynb
 Day3_2.ipynb        LibrosaTut.ipynb       ReadMe.md
 Day3.ipynb          [01;34mLongfiles[0m/             Scikit-learn.ipynb
 Day3_oldest.ipynb  'Main Notebook.ipynb'   SciPy.ipynb
 Day4.ipynb          Matplotlib.ipynb       SVM.ipynb
 Day5b.ipynb         MIDI.ipynb            'Torch Intro.ipynb'
 foo.csv             NumPy.ipynb           [01;34m'Untitled Folder'[0m/


In [64]:
# read file back in
path = './foo.csv'
newDf = pd.read_csv(path)
newDf.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9
0,0.677467,0.509688,0.70759,0.057028,0.626685,0.137019,0.110339,0.462664,0.42349,0.324749
1,0.442167,0.446092,0.329015,0.205422,0.754569,0.945691,0.637116,0.305408,0.722206,0.544692
2,0.080595,0.903929,0.292118,0.594333,0.99,0.458197,0.68249,0.245481,0.878862,0.415781
3,0.478495,0.75676,0.546258,0.389027,0.065202,0.551556,0.846113,0.143062,0.217625,0.690052
4,0.046914,0.156225,0.244266,0.595211,0.241328,0.860169,0.792456,0.131174,0.182643,0.86674


In [65]:
# remove the file
import os
os.remove(path)

In [66]:
# can also do Excel
df.to_excel('foo.xlsx', sheet_name='Sheet1')

In [67]:
newDf2 = pd.read_excel('foo.xlsx', 'Sheet1', index_col=None, na_values=['NA'])
newDf2.head()

Unnamed: 0.1,Unnamed: 0,0,1,2,3,4,5,6,7,8,9
0,0,0.677467,0.509688,0.70759,0.057028,0.626685,0.137019,0.110339,0.462664,0.42349,0.324749
1,1,0.442167,0.446092,0.329015,0.205422,0.754569,0.945691,0.637116,0.305408,0.722206,0.544692
2,2,0.080595,0.903929,0.292118,0.594333,0.99,0.458197,0.68249,0.245481,0.878862,0.415781
3,3,0.478495,0.75676,0.546258,0.389027,0.065202,0.551556,0.846113,0.143062,0.217625,0.690052
4,4,0.046914,0.156225,0.244266,0.595211,0.241328,0.860169,0.792456,0.131174,0.182643,0.86674


In [68]:
os.remove('foo.xlsx')