In [1]:
import pandas as pd
import numpy as np

# Chapter 6

## Data Loading, Storage, and File Formats

## 6.1 Reading and Writing Data in Text Format

pandas has a number of functions for reading tabular data, but will most likely use read_csv and read_table

optional arguments for these functions include: indexing, type inference and data conversion, datetime parsing, iterating, and unclean data issues

handling dates and other custom types can require extra effort. start with a small comma-separated (CSV) file:

In [126]:
!cat examples/ex1.csv

a,b,c,d,message
1,2,3,4,hello
5,6,7,8,world
9,10,11,12,foo

In [127]:
df = pd.read_csv('examples/ex1.csv')

In [128]:
df

Unnamed: 0,a,b,c,d,message
0,1,2,3,4,hello
1,5,6,7,8,world
2,9,10,11,12,foo


In [129]:
pd.read_table('examples/ex1.csv', sep=',') #used read-table and specified the deliminator

Unnamed: 0,a,b,c,d,message
0,1,2,3,4,hello
1,5,6,7,8,world
2,9,10,11,12,foo


In [130]:
!cat examples/ex2.csv #file will not always have a header row

1,2,3,4,hello
5,6,7,8,world
9,10,11,12,foo

In [131]:
pd.read_csv('examples/ex2.csv', header=None) #allow pandas to assign default column names

Unnamed: 0,0,1,2,3,4
0,1,2,3,4,hello
1,5,6,7,8,world
2,9,10,11,12,foo


In [132]:
pd.read_csv('examples/ex2.csv', names=['a', 'b', 'c', 'd', 'message']) #assign column names

Unnamed: 0,a,b,c,d,message
0,1,2,3,4,hello
1,5,6,7,8,world
2,9,10,11,12,foo


In [133]:
names = ['a', 'b', 'c', 'd', 'message']

In [134]:
pd.read_csv('examples/ex2.csv', names=names, index_col='message') #setting message column to be the index

Unnamed: 0_level_0,a,b,c,d
message,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
hello,1,2,3,4
world,5,6,7,8
foo,9,10,11,12


if you want to form a hierarchical index from multiple columns, pass a list of column numbers or names:

In [135]:
!cat examples/csv_mindex.csv

key1,key2,value1,value2
one,a,1,2
one,b,3,4
one,c,5,6
one,d,7,8
two,a,9,10
two,b,11,12
two,c,13,14
two,d,15,16


In [136]:
parsed = pd.read_csv('examples/csv_mindex.csv', index_col=['key1', 'key2'])

In [137]:
parsed

Unnamed: 0_level_0,Unnamed: 1_level_0,value1,value2
key1,key2,Unnamed: 2_level_1,Unnamed: 3_level_1
one,a,1,2
one,b,3,4
one,c,5,6
one,d,7,8
two,a,9,10
two,b,11,12
two,c,13,14
two,d,15,16


in some cases a table might not have a fixed delimiter, using whitespace or some other pattern to seaparate fields. for example:


In [138]:
list(open('examples/ex3.txt'))

['            A         B         C\n',
 'aaa -0.264438 -1.026059 -0.619500\n',
 'bbb  0.927272  0.302904 -0.032399\n',
 'ccc -0.264273 -0.386314 -0.217601\n',
 'ddd -0.871858 -0.348382  1.100491\n']

the fields here are separated by a variable amount of whitespace. in these cases, you can pass a regular expression as a delimiter for read-table. use the reg:ex \s+

In [139]:
result = pd.read_table('examples/ex3.txt', sep='\s+')

In [140]:
result

Unnamed: 0,A,B,C
aaa,-0.264438,-1.026059,-0.6195
bbb,0.927272,0.302904,-0.032399
ccc,-0.264273,-0.386314,-0.217601
ddd,-0.871858,-0.348382,1.100491


In [141]:
!cat examples/ex4.csv


# hey!
a,b,c,d,message
# just wanted to make things more difficult for you
# who reads CSV files with computers, anyway?
1,2,3,4,hello
5,6,7,8,world
9,10,11,12,foo

In [142]:
pd.read_csv('examples/ex4.csv', skiprows=[0, 2, 3]) #skipping rows

Unnamed: 0,a,b,c,d,message
0,1,2,3,4,hello
1,5,6,7,8,world
2,9,10,11,12,foo


handling missing values- by default, pandas uses a set of commonly occuring sentinels such as NA and NULL

In [143]:
!cat examples/ex5.csv

something,a,b,c,d,message
one,1,2,3,4,NA
two,5,6,,8,world
three,9,10,11,12,foo

In [144]:
result = pd.read_csv('examples/ex5.csv')

In [145]:
result

Unnamed: 0,something,a,b,c,d,message
0,one,1,2,3.0,4,
1,two,5,6,,8,world
2,three,9,10,11.0,12,foo


In [146]:
pd.isnull(result)

Unnamed: 0,something,a,b,c,d,message
0,False,False,False,False,False,True
1,False,False,False,True,False,False
2,False,False,False,False,False,False


the na_values option can take either a list or set of strings to consider missing values:

In [147]:
result = pd.read_csv('examples/ex5.csv', na_values=['Null'])

In [148]:
result

Unnamed: 0,something,a,b,c,d,message
0,one,1,2,3.0,4,
1,two,5,6,,8,world
2,three,9,10,11.0,12,foo


In [149]:
sentinels = {'message': ['foo', 'NA'], 'something': ['two']} 

#differente NA sentinels can be specified for each column in a dict:

In [150]:
pd.read_csv('examples/ex5.csv', na_values=sentinels)

Unnamed: 0,something,a,b,c,d,message
0,one,1,2,3.0,4,
1,,5,6,,8,world
2,three,9,10,11.0,12,


### Reading Text Files in Pieces

when processing large files, may want to only read a small piece or iterate through a small piece first. 

In [151]:
pd.options.display.max_rows = 10 #make pandas display settings compact

In [152]:
result = pd.read_csv('examples/ex6.csv')

In [153]:
result

Unnamed: 0,one,two,three,four,key
0,0.467976,-0.038649,-0.295344,-1.824726,L
1,-0.358893,1.404453,0.704965,-0.200638,B
2,-0.501840,0.659254,-0.421691,-0.057688,G
3,0.204886,1.074134,1.388361,-0.982404,R
4,0.354628,-0.133116,0.283763,-0.837063,Q
...,...,...,...,...,...
9995,2.311896,-0.417070,-1.409599,-0.515821,L
9996,-0.479893,-0.650419,0.745152,-0.646038,E
9997,0.523331,0.787112,0.486066,1.093156,K
9998,-0.362559,0.598894,-1.843201,0.887292,G


In [154]:
pd.read_csv('examples/ex6.csv', nrows=5) #specify number of rows you want to read with nrows

Unnamed: 0,one,two,three,four,key
0,0.467976,-0.038649,-0.295344,-1.824726,L
1,-0.358893,1.404453,0.704965,-0.200638,B
2,-0.50184,0.659254,-0.421691,-0.057688,G
3,0.204886,1.074134,1.388361,-0.982404,R
4,0.354628,-0.133116,0.283763,-0.837063,Q


In [155]:
chunker = pd.read_csv('examples/ex6.csv', chunksize=1000) #read a file in pieces use chunksize as a number of rows

In [156]:
chunker

<pandas.io.parsers.readers.TextFileReader at 0x11a6f92a0>

the TextParser object returned by read_csv allows you to iterate over the parts of the file according to the chunksize. for example, we can iterate over ex6.csv, aggregating the value counts in the 'key' column:

In [157]:
chunker = pd.read_csv('examples/ex6.csv', chunksize =1000)

tot = pd.Series([])
for piece in chunker:
    tot = tot.add(piece['key'].value_counts(), fill_value=0)
    
tot = tot.sort_values(ascending=False)

  tot = pd.Series([])


In [158]:
tot[:10]

E    368.0
X    364.0
L    346.0
O    343.0
Q    340.0
M    338.0
J    337.0
F    335.0
K    334.0
H    330.0
dtype: float64

TextParser is also equipped with a get_chunk method

### Writing Data to Text Format

Data can be exported to a delimited format

In [159]:
data = pd.read_csv('examples/ex5.csv')

In [160]:
data

Unnamed: 0,something,a,b,c,d,message
0,one,1,2,3.0,4,
1,two,5,6,,8,world
2,three,9,10,11.0,12,foo


In [161]:
data.to_csv('examples/out.csv') #using DataFrame's to_csv method to write the data out to a CSV file

In [162]:
!cat examples/out.csv

,something,a,b,c,d,message
0,one,1,2,3.0,4,
1,two,5,6,,8,world
2,three,9,10,11.0,12,foo


_writing to sys.stdout prints the text result to the console):_

In [163]:
import sys

In [164]:
data.to_csv(sys.stdout, sep='|')

|something|a|b|c|d|message
0|one|1|2|3.0|4|
1|two|5|6||8|world
2|three|9|10|11.0|12|foo


missing values appear as empty strings in the output. denote them by some other sentinel value:

In [165]:
data.to_csv(sys.stdout, na_rep='NULL')

,something,a,b,c,d,message
0,one,1,2,3.0,4,NULL
1,two,5,6,NULL,8,world
2,three,9,10,11.0,12,foo


In [166]:
data.to_csv(sys.stdout, index=False, header=False) #disabling row and column labels

one,1,2,3.0,4,
two,5,6,,8,world
three,9,10,11.0,12,foo


In [167]:
data.to_csv(sys.stdout, index=False, columns=['a', 'b', 'c']) #write only a subset of the columns

a,b,c
1,2,3.0
5,6,
9,10,11.0


In [168]:
dates = pd.date_range('1/1/2000', periods=7) #Series CSV Method here and below

In [169]:
ts = pd.Series(np.arange(7), index=dates)

In [170]:
ts.to_csv('examples/tseris.csv')

In [171]:
!cat examples/tseries.csv

2000-01-01,0
2000-01-02,1
2000-01-03,2
2000-01-04,3
2000-01-05,4
2000-01-06,5
2000-01-07,6


### Working with Delimited Formats

may need to do some manual processing on files with malformed lines that trip up read_table

In [172]:
!cat examples/ex7.csv

"a","b","c"
"1","2","3"
"1","2","3"


for any file with a single-character delimiter, you can use python's built-in csv module. pass any open file to csv.reader

In [173]:
import csv
f = open('examples/ex7.csv')

reader = csv.reader(f)

In [174]:
for line in reader:
    print(line) #iterating through reader yields tuples of values

['a', 'b', 'c']
['1', '2', '3']
['1', '2', '3']


In [175]:
with open('examples/ex7.csv') as f:
    lines = list(csv.reader(f)) #read the file into a list of lines

In [176]:
header, values = lines[0], lines[1:] #split the lines into the header line and the data lines

In [177]:
data_dict = {h: v for h, v in zip(header, zip(*values))} 
#create a dictionary of data columns using dict comprehension and zip(*values), which transposes rows & columns

In [178]:
data_dict

{'a': ('1', '1'), 'b': ('2', '2'), 'c': ('3', '3')}

to define a new format with a different delimiter, string quoting convention, or line terminator, we define a simple subclass of csv.Dialect:

In [None]:
class my_dialect(csv.Dialect):
    lineterminator = '\n'
    delimiter = ';'
    quotechar = '"'
    quoting = csv.QUOTE_MINIMAL
    
reader = csv.reader(f, dialect=my_dialect)

In [None]:
reader = csv.reader(f, delimiter='|')

to write delmited files manually, you can use csv.writer

In [181]:
with open('mydata.csv', 'w')as f: 
    writer = csv.writer(f, dialect=my_dialect)
    writer.writerow(('one', 'two', 'three'))
    writer.writerow(('1', '2', '3'))
    writer.writerow(('4', '5', '6'))
    writer.writerow(('7', '8', '9'))

### JSON Data

JavaScript Object Notations (JSON) has become one of the standard formats for sending data by HTTP request

In [182]:
obj = """
{"name": "Wes",
"places_lived": ["United States", "Spain", "Germany"],
"pet": null,
"siblings": [{"name": "Scott", "age": 30, "pets": ["Zeus", "Zuko"]},
            {"name": "Katie", "age": 38, "pets": ["Sixes", "Stache", "Cisco"]}]
}
"""

**to convert a JSON string to Python form, use json.loads**

In [183]:
import json

In [184]:
result = json.loads(obj)

In [185]:
result

{'name': 'Wes',
 'places_lived': ['United States', 'Spain', 'Germany'],
 'pet': None,
 'siblings': [{'name': 'Scott', 'age': 30, 'pets': ['Zeus', 'Zuko']},
  {'name': 'Katie', 'age': 38, 'pets': ['Sixes', 'Stache', 'Cisco']}]}

**json.dumps converts a Python object back to JSON**

In [186]:
asjson = json.dumps(result)

In [187]:
siblings = pd.DataFrame(result['siblings'], columns=['name', 'age'])

In [188]:
siblings

Unnamed: 0,name,age
0,Scott,30
1,Katie,38


the pandas.read_json can automatically convert JSON datasets in specific arrangements into a Series or DataFrame

In [189]:
!cat examples/example.json

[{"a": 1, "b": 2, "c": 3},
 {"a": 4, "b": 5, "c": 6},
 {"a": 7, "b": 8, "c": 9}]


In [190]:
data = pd.read_json('examples/example.json') 
#default options for pandas.read_json assume that each object in the json array is a row in the table

In [191]:
data

Unnamed: 0,a,b,c
0,1,2,3
1,4,5,6
2,7,8,9


If you need to export data from pandas to JSON, one way is to use the to_json methods on Series and DataFrame:

In [192]:
print(data.to_json)

<bound method NDFrame.to_json of    a  b  c
0  1  2  3
1  4  5  6
2  7  8  9>


In [193]:
print(data.to_json(orient='records'))

[{"a":1,"b":2,"c":3},{"a":4,"b":5,"c":6},{"a":7,"b":8,"c":9}]


### XML and HTML: Web Scraping

pandas has built-in function, read_html which uses libraries like lxml and Beautiful Soup to automatically parse tables out of HTML files as DataFrame objects. 

In [194]:
!pip3 install lxml

Defaulting to user installation because normal site-packages is not writeable

[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip available: [0m[31;49m22.1.2[0m[39;49m -> [0m[32;49m22.2[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49m/Applications/Xcode.app/Contents/Developer/usr/bin/python3 -m pip install --upgrade pip[0m


In [195]:
!pip3 install beautifulsoup4 html5lib

Defaulting to user installation because normal site-packages is not writeable

[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip available: [0m[31;49m22.1.2[0m[39;49m -> [0m[32;49m22.2[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49m/Applications/Xcode.app/Contents/Developer/usr/bin/python3 -m pip install --upgrade pip[0m


In [196]:
tables = pd.read_html('examples/fdic_failed_bank_list.html')

In [197]:
len(tables)

1

In [198]:
failures = tables[0]

In [199]:
failures.head()

Unnamed: 0,Bank Name,City,ST,CERT,Acquiring Institution,Closing Date,Updated Date
0,Allied Bank,Mulberry,AR,91,Today's Bank,"September 23, 2016","November 17, 2016"
1,The Woodbury Banking Company,Woodbury,GA,11297,United Bank,"August 19, 2016","November 17, 2016"
2,First CornerStone Bank,King of Prussia,PA,35312,First-Citizens Bank & Trust Company,"May 6, 2016","September 6, 2016"
3,Trust Company Bank,Memphis,TN,9956,The Bank of Fayette County,"April 29, 2016","September 6, 2016"
4,North Milwaukee State Bank,Milwaukee,WI,20364,First-Citizens Bank & Trust Company,"March 11, 2016","June 16, 2016"


In [200]:
close_timestamps = pd.to_datetime(failures['Closing Date'])

In [201]:
close_timestamps.dt.year.value_counts()

2010    157
2009    140
2011     92
2012     51
2008     25
       ... 
2004      4
2001      4
2007      3
2003      3
2000      2
Name: Closing Date, Length: 15, dtype: int64

#### Parsing XML with lxml.objectify

XML(eXtensible Markup Language) is another structured data format supporting hierarchical, nested data with metadata.  

In [202]:
from lxml import objectify

In [None]:
path = 'examples/mta_perf/Performance_MNR.xml'
parsed = objectify.parse(open(path))
root = parsed.getroot()

In [None]:
data = []
skip_fields = ['PARENT_SEQ', 'INDICATOR_SEQ',
                   'DESIRED_CHANGE', 'DECIMAL_PLACES']
for elt in root.INDICATOR: 
    el_data = {}
    for child in elt.getchildren(): 
        if child.tag in skip_fields:
            continue
        el_data[child.tag] = child.pyval
    data.append(el_data)


In [None]:
perf = pd.DataFrame(data)

In [None]:
perf.head()

In [205]:
from io import StringIO

In [206]:
tag = '<a href="http://www.google.com">Google</a>'
root = objectify.parse(StringIO(tag)).getroot()

In [207]:
root

<Element a at 0x11a780240>

In [208]:
root.get('href')

'http://www.google.com'

In [209]:
root.text

'Google'

## 6.2 Binary Data Formats

one of the easiest ways to store datea (also known as serialization) efficiently in binary format is using Python's built-in pickle serialization. pandas objects all have a to_pickle method that writes the data to disk in pickle format:

In [210]:
frame = pd.read_csv('examples/ex1.csv')

In [211]:
frame

Unnamed: 0,a,b,c,d,message
0,1,2,3,4,hello
1,5,6,7,8,world
2,9,10,11,12,foo


In [212]:
frame.to_pickle('examples/frame_pickle')

**can read any pickled file by using pandas.read_pickle**

In [213]:
pd.read_pickle('examples/frame_pickle')

Unnamed: 0,a,b,c,d,message
0,1,2,3,4,hello
1,5,6,7,8,world
2,9,10,11,12,foo


### Using HDF5 Format

HDF5  is a file format inteded for storing large quantities of scientific array data. supports on the fly compression. good choice for very large data sets, you can read and write small sections of much larger arrays.

In [214]:
frame = pd.DataFrame({'a': np.random.randn(100)})

In [None]:
store = pd.HDFStore('mydata.h5')

In [None]:
store['obj1'] = frame



In [None]:
store['obj1_col'] = frame['a']

In [None]:
store

In [None]:
store['obj1']

In [None]:
store.put('obj2', frame, format='table)')

In [None]:
store.select('obj2', where=['index >= 10 and index <=15'])

In [None]:
store.close()

In [None]:
frame.to_hdf('mydata.h5', 'obj3', format='table')

In [None]:
pd.read_hdf('mydata.h5', 'obj3', where=['index < 5'])

### Reading Microsoft Excel Files

use pandas.read_excel. internally these tools use the add-on packages xlrd and openpyxl to read XLS and SLXS files

In [219]:
import sys
!{sys.executable} -m pip install openpyxl

Collecting openpyxl
  Using cached openpyxl-3.0.10-py2.py3-none-any.whl (242 kB)
Collecting et-xmlfile
  Using cached et_xmlfile-1.1.0-py3-none-any.whl (4.7 kB)
Installing collected packages: et-xmlfile, openpyxl
Successfully installed et-xmlfile-1.1.0 openpyxl-3.0.10
You should consider upgrading via the '/opt/homebrew/Cellar/jupyterlab/3.4.3/libexec/bin/python3.10 -m pip install --upgrade pip' command.[0m[33m
[0m

In [220]:
xlsx = pd.ExcelFile('examples/ex1.xlsx')

In [221]:
pd.read_excel(xlsx, 'Sheet1')

Unnamed: 0.1,Unnamed: 0,a,b,c,d,message
0,0,1,2,3,4,hello
1,1,5,6,7,8,world
2,2,9,10,11,12,foo


In [222]:
frame = pd.read_excel('examples/ex1.xlsx', 'Sheet1')

In [223]:
frame

Unnamed: 0.1,Unnamed: 0,a,b,c,d,message
0,0,1,2,3,4,hello
1,1,5,6,7,8,world
2,2,9,10,11,12,foo


to write pandas data to excel format, you must first create and ExcelWriter, the write data to it using pandas objects' to_excel method:

In [224]:
writer = pd.ExcelWriter('examples/ex2.xlsx')

In [225]:
frame.to_excel(writer, 'Sheet1')

In [226]:
writer.save()

In [227]:
frame.to_excel('examples/ex2.xlsx') #can also pass a file path to to_excel and avoid the ExcelWriter

## 6.3 Interacting with Web APIs

many websites have public APIs providing data feeds via JSON or some other format. one easy method to access these APIs from Python is the requests package.

In [228]:
import requests

In [229]:
url = 'https://api.github.com/repos/pandas-dev/pandas/issues'

In [230]:
resp = requests.get(url)

In [231]:
resp

<Response [200]>

_the response object's json method will return a dictionary containing JSON parsed into native Python objects:_

In [232]:
data = resp.json()

In [233]:
data[0]['title']

'REF: Rename exchange -> interchange'

In [234]:
issues = pd.DataFrame(data, columns=['number', 'title', 'labels', 'state'])

In [235]:
issues

Unnamed: 0,number,title,labels,state
0,47888,REF: Rename exchange -> interchange,"[{'id': 4344388253, 'node_id': 'LA_kwDOAA0YD88...",open
1,47887,REF: Change _NULL_DESCRIPTION[datetime] to use...,"[{'id': 4344388253, 'node_id': 'LA_kwDOAA0YD88...",open
2,47886,REF: PandasColumn.describe_categorical returns...,"[{'id': 4344388253, 'node_id': 'LA_kwDOAA0YD88...",open
3,47885,DOC: Add numpydoc SS06 validation,"[{'id': 134699, 'node_id': 'MDU6TGFiZWwxMzQ2OT...",open
4,47884,Pandas string dtype needs from NumPy - prototy...,[],open
...,...,...,...,...
25,47849,Added improvements in to_datetime Error report...,"[{'id': 42670965, 'node_id': 'MDU6TGFiZWw0MjY3...",open
26,47848,Fix styling of `DataFrame` for columns with bo...,"[{'id': 1728592794, 'node_id': 'MDU6TGFiZWwxNz...",open
27,47846,DOC: Clarify sorting and order of categoricals...,"[{'id': 134699, 'node_id': 'MDU6TGFiZWwxMzQ2OT...",open
28,47845,"ENH: Add more ""Offset aliases"" to Timedelta fu...","[{'id': 76812, 'node_id': 'MDU6TGFiZWw3NjgxMg=...",open


## 6.4 Interacting with Databases

data may be in databases. loading data from SQL into a DataFrame in pandas can be straight forward. Example (create SQLite database using Python's built-in sqlite3 driver)

In [7]:
import sqlite3

In [14]:
query = """
CREATE TABLE test
(a VARCHAR(20), b VARCHAR(20),
c REAL, d INTEGER
);"""

In [15]:
con = sqlite3.connect('mydata.sqlite')

In [16]:
con.execute(query)

<sqlite3.Cursor at 0x107f9e540>

In [17]:
con.commit()

In [18]:
data = [('Atlanta', 'Georgia', 1.25, 6),
       ('Tallahassee', 'Florida', 2.6, 3),
       ('Sacramento', 'California', 1.7, 5)]

In [19]:
stmt = "INSERT INTO test VALUES(?, ?, ?, ?)"

In [20]:
con.executemany(stmt, data)

<sqlite3.Cursor at 0x107f9fcc0>

In [21]:
con.commit()

most python SQL drivers (Py0DBC, psycopg2, mySQLdb, pymssql, etc.) return a list of tuples when selecting data from a table:

In [22]:
cursor = con.execute('select * from test')

In [23]:
rows = cursor.fetchall()

In [24]:
rows

[('Atlanta', 'Georgia', 1.25, 6),
 ('Tallahassee', 'Florida', 2.6, 3),
 ('Sacramento', 'California', 1.7, 5)]

you can pass a list of tuples to the DF constructor, but you also need the column names, contained in the cursor's description attribute:

In [25]:
cursor.description

(('a', None, None, None, None, None, None),
 ('b', None, None, None, None, None, None),
 ('c', None, None, None, None, None, None),
 ('d', None, None, None, None, None, None))

In [26]:
pd.DataFrame(rows, columns=[x[0] for x in cursor.description])

Unnamed: 0,a,b,c,d
0,Atlanta,Georgia,1.25,6
1,Tallahassee,Florida,2.6,3
2,Sacramento,California,1.7,5


SQLAlchemy is a popular python sql toolkit that abstracts away many of the common differences between sql databases. pandas has a read_sql function that enables you to read data easily from a general SQLAlchemy connection. 

In [None]:
import sqlalchemy as sqla

In [None]:
db = sqla.create_engine('sqlite:///mydata.sqlite')

In [None]:
pd.read_sql('select * from test', db)

## 6.5 Conclusion