# CHAPTER 6: 
# Data Loading, Storage, and File Formats

## Reading and Writing Data in Text Format

In [2]:
import numpy as np
import pandas as pd
np.random.seed(12345)
import matplotlib.pyplot as plt
plt.rc('figure', figsize=(10, 6))
np.set_printoptions(precision=4, suppress=True)

In [143]:
with open('examples/ex1.csv') as f:
    for line in f:
        print(line)

a,b,c,d,message

1,2,3,4,hello

5,6,7,8,world

9,10,11,12,foo


In [144]:
df = pd.read_csv('examples/ex1.csv')
df

Unnamed: 0,a,b,c,d,message
0,1,2,3,4,hello
1,5,6,7,8,world
2,9,10,11,12,foo


In [145]:
pd.read_table('examples/ex1.csv', sep=',')

Unnamed: 0,a,b,c,d,message
0,1,2,3,4,hello
1,5,6,7,8,world
2,9,10,11,12,foo


In [146]:
with open('examples/ex2.csv') as f:
    for line in f:
        print(line)


1,2,3,4,hello

5,6,7,8,world

9,10,11,12,foo


In [147]:
pd.read_csv('examples/ex2.csv', header=None)


Unnamed: 0,0,1,2,3,4
0,1,2,3,4,hello
1,5,6,7,8,world
2,9,10,11,12,foo


In [148]:
pd.read_csv('examples/ex2.csv', names=['a', 'b', 'c', 'd', 'message'])

Unnamed: 0,a,b,c,d,message
0,1,2,3,4,hello
1,5,6,7,8,world
2,9,10,11,12,foo


In [149]:
names = ['a', 'b', 'c', 'd', 'message']
pd.read_csv('examples/ex2.csv', names=names, index_col='message')


Unnamed: 0_level_0,a,b,c,d
message,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
hello,1,2,3,4
world,5,6,7,8
foo,9,10,11,12


In [150]:
with open('examples/csv_mindex.csv') as f:
    for line in f:
        print(line)



key1,key2,value1,value2

one,a,1,2

one,b,3,4

one,c,5,6

one,d,7,8

two,a,9,10

two,b,11,12

two,c,13,14

two,d,15,16



In [151]:
parsed = pd.read_csv('examples/csv_mindex.csv',
                     index_col=['key1', 'key2'])
parsed

Unnamed: 0_level_0,Unnamed: 1_level_0,value1,value2
key1,key2,Unnamed: 2_level_1,Unnamed: 3_level_1
one,a,1,2
one,b,3,4
one,c,5,6
one,d,7,8
two,a,9,10
two,b,11,12
two,c,13,14
two,d,15,16


In [152]:
list(open('examples/ex3.txt'))

['            A         B         C\n',
 'aaa -0.264438 -1.026059 -0.619500\n',
 'bbb  0.927272  0.302904 -0.032399\n',
 'ccc -0.264273 -0.386314 -0.217601\n',
 'ddd -0.871858 -0.348382  1.100491\n']

In [153]:
result = pd.read_table('examples/ex3.txt', sep='\s+')
result

Unnamed: 0,A,B,C
aaa,-0.264438,-1.026059,-0.6195
bbb,0.927272,0.302904,-0.032399
ccc,-0.264273,-0.386314,-0.217601
ddd,-0.871858,-0.348382,1.100491


In [154]:
with open('examples/ex4.csv') as f:
    for line in f:
        print(line)


# hey!

a,b,c,d,message

# just wanted to make things more difficult for you

# who reads CSV files with computers, anyway?

1,2,3,4,hello

5,6,7,8,world

9,10,11,12,foo


In [155]:
pd.read_csv('examples/ex4.csv', skiprows=[0, 2, 3])

Unnamed: 0,a,b,c,d,message
0,1,2,3,4,hello
1,5,6,7,8,world
2,9,10,11,12,foo


In [156]:
with open('examples/ex5.csv') as f:
    for line in f:
        print(line)

something,a,b,c,d,message

one,1,2,3,4,NA

two,5,6,,8,world

three,9,10,11,12,foo


In [157]:
result = pd.read_csv('examples/ex5.csv')

result

Unnamed: 0,something,a,b,c,d,message
0,one,1,2,3.0,4,
1,two,5,6,,8,world
2,three,9,10,11.0,12,foo


In [158]:
pd.isnull(result)

Unnamed: 0,something,a,b,c,d,message
0,False,False,False,False,False,True
1,False,False,False,True,False,False
2,False,False,False,False,False,False


In [159]:
result = pd.read_csv('examples/ex5.csv', na_values=['NULL'])
result

Unnamed: 0,something,a,b,c,d,message
0,one,1,2,3.0,4,
1,two,5,6,,8,world
2,three,9,10,11.0,12,foo


In [160]:
sentinels = {'message': ['foo', 'NA'], 'something': ['two']}
pd.read_csv('examples/ex5.csv', na_values=sentinels)

Unnamed: 0,something,a,b,c,d,message
0,one,1,2,3.0,4,
1,,5,6,,8,world
2,three,9,10,11.0,12,


### Reading Text Files in Pieces

In [161]:
pd.options.display.max_rows = 10

In [162]:
result = pd.read_csv('examples/ex6.csv')
result

Unnamed: 0,one,two,three,four,key
0,0.467976,-0.038649,-0.295344,-1.824726,L
1,-0.358893,1.404453,0.704965,-0.200638,B
2,-0.501840,0.659254,-0.421691,-0.057688,G
3,0.204886,1.074134,1.388361,-0.982404,R
4,0.354628,-0.133116,0.283763,-0.837063,Q
...,...,...,...,...,...
9995,2.311896,-0.417070,-1.409599,-0.515821,L
9996,-0.479893,-0.650419,0.745152,-0.646038,E
9997,0.523331,0.787112,0.486066,1.093156,K
9998,-0.362559,0.598894,-1.843201,0.887292,G


In [163]:
pd.read_csv('examples/ex6.csv', nrows=5)

Unnamed: 0,one,two,three,four,key
0,0.467976,-0.038649,-0.295344,-1.824726,L
1,-0.358893,1.404453,0.704965,-0.200638,B
2,-0.50184,0.659254,-0.421691,-0.057688,G
3,0.204886,1.074134,1.388361,-0.982404,R
4,0.354628,-0.133116,0.283763,-0.837063,Q


In [164]:
chunker = pd.read_csv('examples/ex6.csv', chunksize=1000)
chunker

<pandas.io.parsers.readers.TextFileReader at 0x28c4fb6b2e0>

In [165]:
chunker = pd.read_csv('examples/ex6.csv', chunksize=1000)
tot = pd.Series([], dtype = 'float64')
for piece in chunker:
    tot = tot.add(piece['key'].value_counts(), fill_value=0)
tot = tot.sort_values(ascending=False)


In [166]:
tot[:10]

E    368.0
X    364.0
L    346.0
O    343.0
Q    340.0
M    338.0
J    337.0
F    335.0
K    334.0
H    330.0
dtype: float64

### Writing Data Out to Text Format

In [1]:
import pandas as pd
import numpy as np
from pandas import Series, DataFrame
import sys

In [5]:
data = pd.read_csv('ex5.csv')

In [7]:
data

Unnamed: 0,something,a,b,c,d,message
0,one,1,2,3.0,4,
1,two,5,6,,8,world
2,three,9,10,11.0,12,foo


In [9]:
data.to_csv('ex5.csv')

In [10]:
!type out.csv

,something,a,b,c,d,message
0,one,1,2,3.0,4,
1,two,5,6,,8,world
2,three,9,10,11.0,12,foo


In [11]:
data.to_csv(sys.stdout, sep='|')

|something|a|b|c|d|message
0|one|1|2|3.0|4|
1|two|5|6||8|world
2|three|9|10|11.0|12|foo


In [12]:
data.to_csv(sys.stdout, na_rep='NULL')

,something,a,b,c,d,message
0,one,1,2,3.0,4,NULL
1,two,5,6,NULL,8,world
2,three,9,10,11.0,12,foo


In [13]:
data.to_csv(sys.stdout, index=False, header=False)

one,1,2,3.0,4,
two,5,6,,8,world
three,9,10,11.0,12,foo


In [14]:
data.to_csv(sys.stdout, index=False, columns=['a', 'b', 'c'])

a,b,c
1,2,3.0
5,6,
9,10,11.0


In [15]:
dates = pd.date_range('1/1/2000', periods=7)

In [16]:
ts = Series(np.arange(7), index=dates)

In [17]:
ts.to_csv('tseries.csv')

In [18]:
!type tseries.csv

,0
2000-01-01,0
2000-01-02,1
2000-01-03,2
2000-01-04,3
2000-01-05,4
2000-01-06,5
2000-01-07,6


In [19]:
Series.from_csv('tseries.csv', parse_dates=True)
#Deprecated since version 0.21.0: Use pandas.read_csv() instead.

AttributeError: type object 'Series' has no attribute 'from_csv'

### Manually Working with Delimited Formats

In [20]:
!type ex7.csv

"a","b","c"
"1","2","3"
"1","2","3"


In [21]:
import csv
f = open('ex7.csv')
reader = csv.reader(f)

In [22]:
for line in reader:
    print(line)

['a', 'b', 'c']
['1', '2', '3']
['1', '2', '3']


In [23]:
lines = list(csv.reader(open('ex7.csv')))

In [24]:
header, values = lines[0], lines[1:]

In [25]:
data_dict = {h: v for h, v in zip(header, zip(*values))}

In [26]:
data_dict

{'a': ('1', '1'), 'b': ('2', '2'), 'c': ('3', '3')}

In [27]:
class my_dialect(csv.Dialect):
    lineterminator = '\n'
    delimiter = ';'
    quotechar = '"'
    quoting = csv.QUOTE_MINIMAL
reader = csv.reader(f, dialect=my_dialect)

In [28]:
reader = csv.reader(f, delimiter='|')

In [30]:
with open('mydata.csv', 'w') as f:
    writer = csv.writer(f, dialect=my_dialect)
    writer.writerow(('one', 'two', 'three'))
    writer.writerow(('1', '2', '3'))
    writer.writerow(('4', '5', '6'))
    writer.writerow(('7', '8', '9'))

### JSON Data

In [1]:
import json
obj = """
{"name": "Wes",
"places_lived": ["United States", "Spain", "Germany"],
"pet": null,
"siblings": [{"name": "Scott", "age": 25, "pet": "Zuko"},
{"name": "Katie", "age": 33, "pet": "Cisco"}]
}
"""
result=json.loads(obj)
result

{'name': 'Wes',
 'places_lived': ['United States', 'Spain', 'Germany'],
 'pet': None,
 'siblings': [{'name': 'Scott', 'age': 25, 'pet': 'Zuko'},
  {'name': 'Katie', 'age': 33, 'pet': 'Cisco'}]}

In [2]:
asjson = json.dumps(result)

In [4]:
from pandas import DataFrame
siblings = DataFrame(result['siblings'], columns=['name', 'age'])
siblings

Unnamed: 0,name,age
0,Scott,25
1,Katie,33


### XML and HTML: Web Scraping

In [25]:
from lxml.html import parse
from urllib.request import urlopen,Request
url='http://finance.yahoo.com/q/op?s=AAPL+Options'
req=Request(url,headers={'User-Agent': 'Mozilla/5.0'})
parsed=parse(urlopen(req))
doc=parsed.getroot()
doc


<Element html at 0x2ae0a7dc0f0>

In [26]:
links = doc.findall('.//a')
links[15:20]

[<Element a at 0x2ae0a86d270>,
 <Element a at 0x2ae0a86f250>,
 <Element a at 0x2ae0a86d3b0>,
 <Element a at 0x2ae0a86d770>,
 <Element a at 0x2ae0a86d4f0>]

In [44]:
lnk = links[27]
lnk

<Element a at 0x2ae0a86df40>

In [45]:
lnk.get('href')

'https://yahoo.uservoice.com/forums/382977'

In [46]:
lnk.text_content()

'Contact Us'

In [47]:
urls = [lnk.get('href') for lnk in doc.findall('.//a')]
urls[-10:]

['https://help.yahoo.com/kb/finance-for-web/SLN2310.html?locale=en_US',
 'https://help.yahoo.com/kb/finance-for-web',
 'https://yahoo.uservoice.com/forums/382977',
 'https://policies.oath.com/us/en/oath/privacy/index.html',
 'https://policies.oath.com/us/en/oath/privacy/adinfo/index.html',
 'https://legal.yahoo.com/us/en/yahoo/terms/otos/index.html',
 'https://finance.yahoo.com/sitemap/',
 'https://twitter.com/YahooFinance',
 'https://facebook.com/yahoofinance',
 'https://www.linkedin.com/company/yahoo-finance']

In [52]:
tables = doc.findall('.//table')
calls = tables[0]
puts = tables[1]

In [53]:
rows = calls.findall('.//tr')

In [55]:
def _unpack(row, kind='td'):
    elts = row.findall('.//%s' % kind)
    return [val.text_content() for val in elts]

In [56]:
_unpack(rows[0], kind='th')

['Contract Name',
 'Last Trade Date',
 'Strike',
 'Last Price',
 'Bid',
 'Ask',
 'Change',
 '% Change',
 'Volume',
 'Open Interest',
 'Implied Volatility']

In [57]:
_unpack(rows[1], kind='td')

['AAPL221202C00050000',
 '2022-11-25 12:34PM EST',
 '50.00',
 '98.00',
 '97.80',
 '98.55',
 '-1.91',
 '-1.91%',
 '2',
 '0',
 '309.38%']

In [58]:
from pandas.io.parsers import TextParser
def parse_options_data(table):
    rows = table.findall('.//tr')
    header = _unpack(rows[0], kind='th')
    data = [_unpack(r) for r in rows[1:]]
    return TextParser(data, names=header).get_chunk()

In [59]:
call_data = parse_options_data(calls)
put_data = parse_options_data(puts)
call_data[:10]

Unnamed: 0,Contract Name,Last Trade Date,Strike,Last Price,Bid,Ask,Change,% Change,Volume,Open Interest,Implied Volatility
0,AAPL221202C00050000,2022-11-25 12:34PM EST,50.0,98.0,97.8,98.55,-1.91,-1.91%,2,0,309.38%
1,AAPL221202C00075000,2022-10-28 1:35PM EST,75.0,81.35,72.85,73.8,0.0,-,15,0,233.40%
2,AAPL221202C00090000,2022-11-25 11:28AM EST,90.0,57.97,57.85,58.6,10.62,+22.43%,15,0,161.33%
3,AAPL221202C00100000,2022-11-25 11:39AM EST,100.0,47.82,47.85,48.55,-0.88,-1.81%,80,0,126.17%
4,AAPL221202C00105000,2022-11-17 9:36AM EST,105.0,42.48,42.85,43.6,0.0,-,1,0,116.02%
5,AAPL221202C00110000,2022-11-25 11:06AM EST,110.0,38.44,37.9,38.45,-2.91,-7.04%,60,0,94.14%
6,AAPL221202C00115000,2022-11-25 11:58AM EST,115.0,33.4,32.9,33.5,4.9,+17.19%,1,0,85.55%
7,AAPL221202C00120000,2022-11-25 11:05AM EST,120.0,28.49,27.9,28.65,-2.16,-7.05%,16,0,80.08%
8,AAPL221202C00123000,2022-11-25 11:34AM EST,123.0,24.79,24.9,25.65,-3.76,-13.17%,1,0,72.07%
9,AAPL221202C00125000,2022-11-25 12:02PM EST,125.0,23.43,22.95,23.5,-1.69,-6.73%,9,94,62.89%


In [61]:
from lxml import objectify
path = 'C:/Users/huy/Desktop/Performance_MNR.xml'
parsed = objectify.parse(open(path))
root = parsed.getroot()

In [66]:
data = []
skip_fields = ['PARENT_SEQ', 'INDICATOR_SEQ','DESIRED_CHANGE', 'DECIMAL_PLACES']
for elt in root:
    el_data = {}
    for child in elt.getchildren():
        if child.tag in skip_fields:
            continue
        el_data[child.tag] = child.pyval
        data.append(el_data)

In [68]:
perf = DataFrame(data)
perf.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 12 entries, 0 to 11
Data columns (total 12 columns):
 #   Column          Non-Null Count  Dtype  
---  ------          --------------  -----  
 0   AGENCY_NAME     12 non-null     object 
 1   INDICATOR_NAME  12 non-null     object 
 2   DESCRIPTION     12 non-null     object 
 3   PERIOD_YEAR     12 non-null     int64  
 4   PERIOD_MONTH    12 non-null     int64  
 5   CATEGORY        12 non-null     object 
 6   FREQUENCY       12 non-null     object 
 7   INDICATOR_UNIT  12 non-null     object 
 8   YTD_TARGET      12 non-null     float64
 9   YTD_ACTUAL      12 non-null     object 
 10  MONTHLY_TARGET  12 non-null     float64
 11  MONTHLY_ACTUAL  12 non-null     object 
dtypes: float64(2), int64(2), object(8)
memory usage: 1.2+ KB


In [71]:
from io import StringIO
tag = '<a href="http://www.google.com">Google</a>'
root = objectify.parse(StringIO(tag)).getroot()

In [72]:
root

<Element a at 0x2ae0c1e6fc0>

In [73]:
root.get('href')

'http://www.google.com'

In [74]:
root.text

'Google'

## Binary Data Formats

In [4]:
frame = pd.read_csv('examples/ex1.csv')
frame

Unnamed: 0,a,b,c,d,message
0,1,2,3,4,hello
1,5,6,7,8,world
2,9,10,11,12,foo


In [5]:
frame.to_pickle('examples/frame_pickle')

In [6]:
pd.read_pickle('examples/frame_pickle')

Unnamed: 0,a,b,c,d,message
0,1,2,3,4,hello
1,5,6,7,8,world
2,9,10,11,12,foo


### Using HDF5 Format

In [10]:
store = pd.HDFStore('mydata.h5')
store['obj1'] = frame
store['obj1_col'] = frame['a']
store

<class 'pandas.io.pytables.HDFStore'>
File path: mydata.h5

In [11]:
store['obj1']

Unnamed: 0,a,b,c,d,message
0,1,2,3,4,hello
1,5,6,7,8,world
2,9,10,11,12,foo


### Reading Microsoft Excel Files

In [38]:
import pandas as pd
import xlrd
xlsx_file = pd.ExcelFile('day.xlsx')
df = xlsx_file.parse('Worksheet')
df

Unnamed: 0,instant,dteday,season,yr,mnth,holiday,weekday,workingday,weathersit,temp,atemp,hum,windspeed,casual,registered,cnt
0,1,2011-01-01,1,0,1,0,6,0,2,0.344167,0.363625,0.805833,0.160446,331,654,985
1,2,2011-01-02,1,0,1,0,0,0,2,0.363478,0.353739,0.696087,0.248539,131,670,801
2,3,2011-01-03,1,0,1,0,1,1,1,0.196364,0.189405,0.437273,0.248309,120,1229,1349
3,4,2011-01-04,1,0,1,0,2,1,1,0.200000,0.212122,0.590435,0.160296,108,1454,1562
4,5,2011-01-05,1,0,1,0,3,1,1,0.226957,0.229270,0.436957,0.186900,82,1518,1600
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
726,727,2012-12-27,1,1,12,0,4,1,2,0.254167,0.226642,0.652917,0.350133,247,1867,2114
727,728,2012-12-28,1,1,12,0,5,1,2,0.253333,0.255046,0.590000,0.155471,644,2451,3095
728,729,2012-12-29,1,1,12,0,6,0,2,0.253333,0.242400,0.752917,0.124383,159,1182,1341
729,730,2012-12-30,1,1,12,0,0,0,1,0.255833,0.231700,0.483333,0.350754,364,1432,1796


## Interacting with HTML and Web APIs

In [1]:
import pandas as pd
import numpy as np

In [2]:
import requests

url = "https://twitter154.p.rapidapi.com/search/search"

querystring = {"query":"#python%20pandas","section":"top","min_retweets":"20","min_likes":"20","limit":"5","start_date":"2022-01-01","language":"en"}

headers = {"X-RapidAPI-Key": "3ca3abc1f7mshfd96a9ce916a026p1dcaabjsnaf1ec9ecd60f",
           "X-RapidAPI-Host": "twitter154.p.rapidapi.com"
}

response = requests.request("GET", url, headers=headers, params=querystring)


In [3]:
response

<Response [200]>

In [4]:
import json

In [5]:
data_tweet = json.loads(response.text)


In [6]:
data_tweet['results']

[{'tweet_id': '1500450317481222148',
  'creation_date': 'Sun Mar 06 12:36:47 +0000 2022',
  'text': 'I recently learned of a neat pandas.DataFrame method called `select_dtypes` which is a much cleaner way of filtering columns on type than checking each column type in a for loop! #python #Pandas https://t.co/Btelyg7T4U',
  'media_url': ['https://pbs.twimg.com/media/FNKrlq1XEAEvP1T.png'],
  'video_url': None,
  'user': {'creation_date': 'Sat Oct 23 02:05:18 +0000 2021',
   'user_id': '1451731326697156619',
   'username': 'pypeaday',
   'name': 'PypeADay🐍',
   'follower_count': 341,
   'following_count': 131,
   'favourites_count': 1547,
   'is_private': False,
   'is_verified': False,
   'location': '',
   'profile_pic_url': 'https://pbs.twimg.com/profile_images/1451731539142844418/ifYGM2vo_normal.jpg',
   'profile_banner_url': 'https://pbs.twimg.com/profile_banners/1451731326697156619/1662732097',
   'description': "Jik. Pipes and Python - usually not at the same time. I'm well-grounded

In [7]:
tweet_fields = ['creation_date', 'text']

In [8]:
tweets = pd.DataFrame(data_tweet['results'], columns=tweet_fields)

In [9]:
tweets

Unnamed: 0,creation_date,text
0,Sun Mar 06 12:36:47 +0000 2022,I recently learned of a neat pandas.DataFrame ...
1,Sun Jul 03 02:25:55 +0000 2022,Anatomy of Pandas data structures\n\n#Pandas i...
2,Mon May 09 11:27:09 +0000 2022,Want to speedup Pandas DataFrame operations? L...
3,Sat Mar 19 17:37:00 +0000 2022,I've seen lots of great EDA tips for working i...
4,Tue Apr 05 14:08:36 +0000 2022,If you've ever wanted to convert a PDF table t...


In [10]:
tweets.iloc[3]

creation_date                       Sat Mar 19 17:37:00 +0000 2022
text             I've seen lots of great EDA tips for working i...
Name: 3, dtype: object

## Interacting with Databases

In [11]:
import sqlite3
query = """
CREATE TABLE test
(a VARCHAR(20), b VARCHAR(20),
c REAL, d INTEGER );"""
con = sqlite3.connect(':memory:') 
con.execute(query)
con.commit()

In [12]:
data = [('Atlanta', 'Georgia', 1.25, 6), ('Tallahassee', 'Florida', 2.6, 3), ('Sacramento', 'California', 1.7, 5)]
stmt = "INSERT INTO test VALUES(?, ?, ?, ?)"
con.executemany(stmt, data) 
con.commit()

In [13]:
cursor = con.execute('select * from test')

In [14]:
rows = cursor.fetchall()

In [15]:
rows

[('Atlanta', 'Georgia', 1.25, 6),
 ('Tallahassee', 'Florida', 2.6, 3),
 ('Sacramento', 'California', 1.7, 5)]

In [16]:
a = np.array(cursor.description)
a[:,0]

array(['a', 'b', 'c', 'd'], dtype=object)

In [17]:
pd.DataFrame(rows, columns=a[:,0])

Unnamed: 0,a,b,c,d
0,Atlanta,Georgia,1.25,6
1,Tallahassee,Florida,2.6,3
2,Sacramento,California,1.7,5


In [18]:
import pandas.io.sql as sql

In [19]:
sql.read_sql('select * from test', con)

Unnamed: 0,a,b,c,d
0,Atlanta,Georgia,1.25,6
1,Tallahassee,Florida,2.6,3
2,Sacramento,California,1.7,5


### Storing and Loading Data in MongoDB

In [20]:
from pymongo import MongoClient

In [21]:
client = MongoClient('localhost', port=27017)

In [27]:
tweets_collection = client.db.tweets_database_collection

In [28]:
for tweet in data_tweet['results']: 
    tweets_collection.insert_one(tweet)

In [29]:
cursor = tweets_collection.find({"user.username": "thedataprof"})

In [30]:
tweet_fields = ['creation_date', 'text'] 
result = pd.DataFrame(list(cursor), columns=tweet_fields)

In [31]:
result

Unnamed: 0,creation_date,text
0,Sun Jul 03 02:25:55 +0000 2022,Anatomy of Pandas data structures\n\n#Pandas i...
