# Data Structures Revisited

24 July 2017 | Python

In [171]:
import pandas as pd
from blist import sorteddict
from scipy.spatial import KDTree

### Data frames

In [172]:
# creating a series (one-dimensional array)
simpleSeries = pd.Series([42, 55, 73], dtype='f8')

print(simpleSeries)

0    42.0
1    55.0
2    73.0
dtype: float64


In [173]:
# creating a series with custom index
indexSeries = pd.Series([42, 55, 73], index=["electron", "proton", "neutron"], dtype='f8')

print(indexSeries)

electron    42.0
proton      55.0
neutron     73.0
dtype: float64


In [174]:
# accessing a value in a series
indexSeries['electron']

42.0

In [175]:
# accessing multiple values in a series (range)
indexSeries['electron':'neutron']

electron    42.0
proton      55.0
neutron     73.0
dtype: float64

In [176]:
# accessing multiple values in a series using indexing
indexSeries[1:]

proton     55.0
neutron    73.0
dtype: float64

In [177]:
# creating a series from dictionary
dictSeries = pd.Series({'electron': 6, 'neutron': 28, 'proton': 496, 'neutrino': 8128})

print(dictSeries)

electron       6
neutrino    8128
neutron       28
proton       496
dtype: int64


In [178]:
# combining series with addition
indexSeries + dictSeries

electron     48.0
neutrino      NaN
neutron     101.0
proton      551.0
dtype: float64

In [179]:
# combining series by creating a new data frame
combinedSeries = pd.DataFrame({'A': indexSeries, 'B': dictSeries})

print(combinedSeries)

             A     B
electron  42.0     6
neutrino   NaN  8128
neutron   73.0    28
proton    55.0   496


In [180]:
# accessing columns (as series)
combinedSeries['A']

electron    42.0
neutrino     NaN
neutron     73.0
proton      55.0
Name: A, dtype: float64

In [181]:
# add new row with index to 'combinedSeries'
appendSeries = combinedSeries.append(pd.DataFrame({'A': [-8128]}, index=['antineutrino']))

print(appendSeries)

                   A       B
electron        42.0     6.0
neutrino         NaN  8128.0
neutron         73.0    28.0
proton          55.0   496.0
antineutrino -8128.0     NaN


In [182]:
# drop a row
appendSeries = appendSeries.drop('neutron')

print(appendSeries)

                   A       B
electron        42.0     6.0
neutrino         NaN  8128.0
proton          55.0   496.0
antineutrino -8128.0     NaN


In [183]:
# transpose a data frame
combinedSeries.T

Unnamed: 0,electron,neutrino,neutron,proton
A,42.0,,73.0,55.0
B,6.0,8128.0,28.0,496.0


In [184]:
# masking a data frame
combinedSeries > 120

Unnamed: 0,A,B
electron,False,False
neutrino,False,True
neutron,False,False
proton,False,True


In [185]:
# add masking as column to data frame (larger than 120)
combinedSeries['large'] = (combinedSeries['A'] > 120) | (combinedSeries['B'] > 120)

print(combinedSeries)

             A     B  large
electron  42.0     6  False
neutrino   NaN  8128   True
neutron   73.0    28  False
proton    55.0   496   True


In [186]:
# delete a column
del combinedSeries['large']

print(combinedSeries)

             A     B
electron  42.0     6
neutrino   NaN  8128
neutron   73.0    28
proton    55.0   496


### Binary trees

In [187]:
# create a new b-tree
b = sorteddict(first="Michael", last="Sjoeberg", birthday=[1750, 1, 1])

print(b)

sorteddict({'birthday': [1750, 1, 1], 'first': 'Michael', 'last': 'Sjoeberg'})


In [188]:
# add a value to b-tree
b['email'] = "michael@doolio.co"

print(b)

sorteddict({'birthday': [1750, 1, 1], 'email': 'michael@doolio.co', 'first': 'Michael', 'last': 'Sjoeberg'})


In [189]:
# list keys in b-tree
list(b.keys())

['birthday', 'email', 'first', 'last']

### K-dimensional trees

In [190]:
# define a set of nodes
points = [(1, 2), (3, 2), (5, 5), (2, 1), (4, 3), (1, 5)]

In [191]:
# create a tree
tree = KDTree(points)

print(tree.data)

[[1 2]
 [3 2]
 [5 5]
 [2 1]
 [4 3]
 [1 5]]


In [192]:
# find nearest point and index
dist, idx = tree.query([(4.5, 1.25)])

In [193]:
# distance to nearest point
print(dist)

[ 1.67705098]


In [194]:
# index of nearest point
print(idx)

[1]


In [195]:
# get nearest point
tree.data[idx]

array([[3, 2]])

<i>Notebook by <a href="https://www.michaelsjoeberg.com">Michael Sjoeberg</a>, updated 24 July 2017.</i>