# Chapter 3 : Data I/O

In [3]:
import pandas as pd
import numpy as np
import scipy.io

## The World of Data

In [5]:
mat = scipy.io.loadmat('Chapter3-Datasets/matlab.mat')
mat

{'__header__': b'MATLAB 5.0 MAT-file Platform: nt, Created on: Tue Feb  2 14:21:02 2021',
 '__version__': '1.0',
 '__globals__': [],
 'storage': array([[0.00000000e+00],
        [3.60020368e-04],
        [7.26299303e-04],
        ...,
        [1.36616373e-05],
        [1.35810556e-05],
        [1.36134929e-05]]),
 'T1': array([[475.5],
        [475.5],
        [475.4],
        ...,
        [476.8],
        [476.8],
        [476.8]]),
 'time': array([[10256548.8],
        [10256549. ],
        [10256549.2],
        ...,
        [10273672.4],
        [10273672.6],
        [10273672.8]]),
 'value': array([[10256548.8       ],
        [10256550.09106825],
        [10256550.31226313],
        ...,
        [10273670.63541315],
        [10273672.1572869 ],
        [10273672.87393071]])}

## Exploring Data Sources

## Text Files and Binary Files

In [8]:
dog_food_orders=pd.read_excel('Chapter3-Datasets/dog_food_orders.xlsx', engine = 'openpyxl')
dog_food_orders

Unnamed: 0,product,wholesale_price,msrp,qty_ordered,qty_shipped
0,skippys_dream,8.99,18.38,100,100
1,just_the_beef,4.99,10.43,200,195
2,potatos_and_lamb,5.19,11.43,50,50
3,turkey_and_cranberries,5.98,12.0,50,50
4,roasted_duck,9.59,17.48,15,15


## Online Data Sources

In [9]:
data_url = 'https://en.wikipedia.org/wiki/Wind_Power'
data=pd.read_html(data_url)
data

[                                                   0
 0                                Part of a series on
 1                                 Sustainable energy
 2  Energy conservation Arcology Building insulati...
 3  Renewable energy Biofuel Biogas Biomass Carbon...
 4  Sustainable transport Bicycle Cycle rickshaw E...
 5                  Renewable energy portal  Category
 6  .mw-parser-output .navbar{display:inline;font-...,
                  Wind farm  Capacity(MW)        Country  Refs
 0          Gansu Wind Farm          7965          China  [22]
 1      Muppandal wind farm          1500          India  [23]
 2  Alta (Oak Creek-Mojave)          1320  United States  [24]
 3      Jaisalmer Wind Park          1064          India  [25],
     0                                                  1
 0 NaN  Graphs are temporarily unavailable due to tech...,
                                                     0   1   2   3   4
 0   Number of countries with wind capacities in th... NaN NaN 

In [10]:
data[1]

Unnamed: 0,Wind farm,Capacity(MW),Country,Refs
0,Gansu Wind Farm,7965,China,[22]
1,Muppandal wind farm,1500,India,[23]
2,Alta (Oak Creek-Mojave),1320,United States,[24]
3,Jaisalmer Wind Park,1064,India,[25]


#### * The table is smaller now than when they published the book

In [16]:
data[13]

Unnamed: 0,vteApplication of wind energy,vteApplication of wind energy.1
0,Wind power,Wind turbine Wind mill
1,Vehicle propulsion,Sailboat Sailing ship Power kite SkySails Iceb...
2,Kite applications,Kite Human-lifting kite
3,Air current,Windcatcher


### Exercise 3.01 - reading data from web pages

In [22]:
page_url=('https://en.wikipedia.org/w/index.php?'+'title=Solar_power&oldid=1022764142')
data = pd.read_html(page_url)
data

[                                                   0
 0                                Part of a series on
 1                                 Sustainable energy
 2  Energy conservation Arcology Building insulati...
 3  Renewable energy Biofuel Biogas Biomass Carbon...
 4  Sustainable transport Bicycle Cycle rickshaw E...
 5                  Renewable energy portal  Category
 6  .mw-parser-output .navbar{display:inline;font-...,
     Solar Electricity Generation                                \
                             Year                  Energy (TWh)   
 0                           2004                           2.6   
 1                           2005                           3.7   
 2                           2006                           5.0   
 3                           2007                           6.8   
 4                           2008                          11.4   
 5                           2009                          19.3   
 6                           20

In [23]:
solar_PV_data = data[2]
solar_PV_data

Unnamed: 0,Name,Country,CapacityMWp,GenerationGWh p.a.,Sizekm2,.mw-parser-output .tooltip-dotted{border-bottom:1px dotted;cursor:help}Year,Ref
0,Pavagada Solar Park,India,2050,,53,2017,[2][53][54]
1,Tengger Desert Solar Park,China,1547,,43,2016,[55][56]
2,Bhadla Solar Park,India,1515,,40,2017,[57][58][59]
3,Kurnool Ultra Mega Solar Park,India,1000,,24,2017,[60]
4,Datong Solar Power Top Runner Base,China,1000,,,2016,[61][62][63]
5,Longyangxia Dam Solar Park,China,850,,23,2015,[64][65][66][67][68]
6,Rewa Ultra Mega Solar,India,750,,,2018,[69]
7,Kamuthi Solar Power Project,India,648,,10.1,2016,[70][71]
8,Solar Star (I and II),United States,579,1664.0,13,2015,[72][73]
9,Topaz Solar Farm,United States,550,1301.0,24.6[74],2014,[75][76][77]


## Fundamental Formats

### Text Data

In [17]:
pd.read_csv('Chapter3-Datasets/bike_share.csv')

Unnamed: 0,dteday,hr,cnt
0,1/1/2011,0,16
1,1/1/2011,1,40
2,1/1/2011,2,32
3,1/1/2011,3,13
4,1/1/2011,4,1
...,...,...,...
17374,12/31/2012,19,119
17375,12/31/2012,20,89
17376,12/31/2012,21,90
17377,12/31/2012,22,61


In [20]:
pd.read_csv('Chapter3-Datasets/bike_share_UCS_2_LE_BOM.csv', encoding = 'utf_16_le')

Unnamed: 0,dteday,hr,cnt
0,1/1/2011,0,16
1,1/1/2011,1,40
2,1/1/2011,2,32
3,1/1/2011,3,13
4,1/1/2011,4,1
...,...,...,...
17374,12/31/2012,19,119
17375,12/31/2012,20,89
17376,12/31/2012,21,90
17377,12/31/2012,22,61


In [21]:
pd.read_csv('Chapter3-Datasets/bike_share_UCS_2_LE_BOM.tsv', encoding = 'utf_16_le', sep = '\t')

Unnamed: 0,dteday,hr,cnt
0,1/1/2011,0,16
1,1/1/2011,1,40
2,1/1/2011,2,32
3,1/1/2011,3,13
4,1/1/2011,4,1
...,...,...,...
17374,12/31/2012,19,119
17375,12/31/2012,20,89
17376,12/31/2012,21,90
17377,12/31/2012,22,61


### Exercise 3.02 - text character encoding and data separators

In [26]:
data= pd.read_csv('Chapter3-Datasets/thyroid.tsv', encoding='utf_16_le', sep='\t')
data.head()

Unnamed: 0,age,sex,on_thyroxine,query_on_thyroxine,on_antithyroid_medication,sick,pregnant,thyroid_surgery,I131_treatment,query_hypothyroid,...,TT4,T4U_measured,T4U,FTI_measured,FTI,TBG_measured,TBG,referral_source,result,value
0,41,F,f,f,f,f,f,f,f,f,...,125,t,1.14,t,109,f,?,SVHC,negative.,3733
1,23,F,f,f,f,f,f,f,f,f,...,102,f,?,f,?,f,?,other,negative.,1442
2,46,M,f,f,f,f,f,f,f,f,...,109,t,0.91,t,120,f,?,other,negative.,2965
3,70,F,t,f,f,f,f,f,f,f,...,175,f,?,f,?,f,?,other,negative.,806
4,70,F,f,f,f,f,f,f,f,f,...,61,t,0.87,t,70,f,?,SVI,negative.,2807


## Binary Data

### Databases - SQL data

In [27]:
import sqlite3

In [29]:
conn = sqlite3.connect('Chapter3-Datasets/bike_share.db')
c = conn.cursor()
data = pd.read_csv('Chapter3-Datasets/bike_share_UCS_2_LE_BOM.tsv',encoding = 'utf_16_le', sep='\t')
c.execute('CREATE TABLE IF NOT EXISTS RENTALS (Date, Hour, Qty)')
conn.commit()
data.to_sql("RENTALS", conn, if_exists = 'replace')

17379