# PyData tour



- `pandas` and `numpy` are the primary libraries used for data analysis in Python
- `pd` and `np` are the conventional Python namespaces used; it is strongly recommended you do the same


In [None]:
import pandas as pd
import numpy as np


- `matplotlib` and `seaborn` are the most popular static data viz libraries in Python
- Again it is strongly recommended you use the `plt` and `sns` namespaces for both!


In [None]:
import matplotlib.pyplot as plt
%matplotlib inline

import seaborn as sns

## Load a data set example: 

In [None]:
url="https://gist.githubusercontent.com/curran/a08a1080b88344b0c8a7/raw/d546eaee765268bf2f487608c537c05e22e4b221/iris.csv"

df = pd.read_csv(url)
df.head()

#df = sns.load_dataset("iris")

In [None]:
#df.tail()

In [None]:
df.describe()

## Selecting values

In [None]:
df.head()

In [None]:
df.iloc[::3,-2:]

In [None]:
df[['sepal_length', 'sepal_width']].iloc[:2]

## Selecting values continued

In [None]:
df[df.sepal_length>5].describe()

In [None]:
df.head()

In [None]:
#df.index

In [None]:
#df1.to_csv("filename.csv", index = False)

## NumPy Arrays

In [None]:
sepal_len = df["sepal_length"].values
petal_len = df.petal_length.values

In [None]:
sepal_len.shape

In [None]:
np.mean(sepal_len), np.median(sepal_len)

In [None]:
sepal_len[:5]

## Broadcasting

In [None]:
sepal_len.shape

In [None]:
diffs = np.abs(sepal_len[:,np.newaxis] - sepal_len[np.newaxis,:])
diffs.shape

In [None]:
sns.heatmap(diffs);

In [None]:
%%timeit
diffs = np.abs(sepal_len[:,np.newaxis] - sepal_len[np.newaxis,:])

In [None]:
%%timeit
diffs = np.zeros((150,150))

for i in range(150):
    for j in range(150):
        diffs[i,j] = np.abs(sepal_len[i] - sepal_len[j])

## Matplotlib : MATLAB-style API

In [None]:
x = np.arange(0,50, .1)
plt.plot(x, np.sin(x));

## Matplotlib : MATLAB-style API

In [None]:
plt.figure(figsize = (8,3))  # create a plot figure

# create the first of two panels and set current axis
plt.subplot(2, 1, 1) # (rows, columns, panel number)
plt.plot(x, np.sin(x))

# create the second panel and set current axis
plt.subplot(2, 1, 2)
plt.plot(-x, np.cos(x));

## Matplotlib: Object Oriented API

In [None]:
# First create a grid of plots
# ax will be an array of two Axes objects
fig, ax = plt.subplots(2,2, figsize=(12,3))

# Call plot() method on the appropriate object
ax[0,0].plot(x, np.sin(x))
ax[0,1].plot(x, np.cos(x));

## Seaborn overview


In [None]:
tips = sns.load_dataset('tips')
tips['tip_pct'] = 100 * tips['tip'] / tips['total_bill']

In [None]:
with sns.axes_style(style='ticks'):
    grid = sns.FacetGrid(tips, row="sex", col="time", hue= "smoker", 
                         margin_titles=True)
    grid.map(plt.hist, "tip_pct", bins=np.linspace(0, 40, 15), alpha = .5);

In [None]:

g = sns.catplot("day", "total_bill", "sex", data=tips, kind="box")


## feature engineering example

In [None]:
from sklearn.datasets import make_circles

np.random.seed(0)

X, y = make_circles(n_samples=400, factor=.1, noise=.05)

In [None]:
df_X = pd.DataFrame(X, columns = ["f1", "f2"])
df_X["f3"] = df_X.f1**2 + df_X.f2**2

In [None]:
sns.relplot(x = "f1", y = "f2", hue = "f3", data = df_X);

In [1]:
import time

In [2]:
time.now()

AttributeError: module 'time' has no attribute 'now'

In [5]:
time.time("utc")

TypeError: time() takes no arguments (1 given)

In [6]:
import datetime

In [7]:
now = datetime.datetime.now()

In [8]:
print (now.strftime("%Y-%m-%d %H:%M:%S"))

2020-04-30 13:23:54


In [9]:
class BinaryTreeNode(object):

    def __init__(self, value):
        self.value = value
        self.left = None
        self.right = None

    def insert_left(self, value):
        self.left = Test.BinaryTreeNode(value)
        return self.left

    def insert_right(self, value):
        self.right = Test.BinaryTreeNode(value)
        return self.right

In [10]:
tree = Test.BinaryTreeNode(5)
left = tree.insert_left(8)
right = tree.insert_right(6)
left.insert_left(1)
left.insert_right(2)
right.insert_left(3)
right.insert_right(4)

NameError: name 'Test' is not defined

In [12]:
import unittest















# Tests

class Test(unittest.TestCase):

    class BinaryTreeNode(object):

        def __init__(self, value):
            self.value = value
            self.left = None
            self.right = None

        def insert_left(self, value):
            self.left = Test.BinaryTreeNode(value)
            return self.left

        def insert_right(self, value):
            self.right = Test.BinaryTreeNode(value)
            return self.right

    def test_full_tree(self):
        tree = Test.BinaryTreeNode(5)
        left = tree.insert_left(8)
        right = tree.insert_right(6)
        left.insert_left(1)
        left.insert_right(2)
        right.insert_left(3)
        right.insert_right(4)
        result = is_balanced(tree)
        self.assertTrue(result)

    def test_both_leaves_at_the_same_depth(self):
        tree = Test.BinaryTreeNode(3)
        left = tree.insert_left(4)
        right = tree.insert_right(2)
        left.insert_left(1)
        right.insert_right(9)
        result = is_balanced(tree)
        self.assertTrue(result)

    def test_leaf_heights_differ_by_one(self):
        tree = Test.BinaryTreeNode(6)
        left = tree.insert_left(1)
        right = tree.insert_right(0)
        right.insert_right(7)
        result = is_balanced(tree)
        self.assertTrue(result)

    def test_leaf_heights_differ_by_two(self):
        tree = Test.BinaryTreeNode(6)
        left = tree.insert_left(1)
        right = tree.insert_right(0)
        right_right = right.insert_right(7)
        right_right.insert_right(8)
        result = is_balanced(tree)
        self.assertFalse(result)

    def test_three_leaves_total(self):
        tree = Test.BinaryTreeNode(1)
        left = tree.insert_left(5)
        right = tree.insert_right(9)
        right.insert_left(8)
        right.insert_right(5)
        result = is_balanced(tree)
        self.assertTrue(result)

    def test_both_subtrees_superbalanced(self):
        tree = Test.BinaryTreeNode(1)
        left = tree.insert_left(5)
        right = tree.insert_right(9)
        right_left = right.insert_left(8)
        right.insert_right(5)
        right_left.insert_left(7)
        result = is_balanced(tree)
        self.assertFalse(result)

    def test_both_subtrees_superbalanced_two(self):
        tree = Test.BinaryTreeNode(1)
        left = tree.insert_left(2)
        right = tree.insert_right(4)
        left.insert_left(3)
        left_right = left.insert_right(7)
        left_right.insert_right(8)
        right_right = right.insert_right(5)
        right_right_right = right_right.insert_right(6)
        right_right_right.insert_right(9)
        result = is_balanced(tree)
        self.assertFalse(result)

    def test_only_one_node(self):
        tree = Test.BinaryTreeNode(1)
        result = is_balanced(tree)
        self.assertTrue(result)

    def test_linked_list_tree(self):
        tree = Test.BinaryTreeNode(1)
        right = tree.insert_right(2)
        right_right = right.insert_right(3)
        right_right.insert_right(4)
        result = is_balanced(tree)
        self.assertTrue(result)



In [39]:
def is_balanced(tree_root):
    # Determine if the tree is superbalanced
    max_height = 0
    min_height = 0
    super_balanced = True
    
    queue = [(tree_root, [tree_root])]
    while queue:
        vertex, path = queue.pop()
        
        if vertex.left is None:
            if max_height < len(path):
                max_height = len(path) 
            if min_height > len(path):
                min_height = len(path)
        else:
            queue.append((vertex.left, path + [vertex.left]))
        
        if vertex.right is None:
            if max_height < len(path):
                max_height = len(path) 
            if min_height > len(path):
                min_height = len(path)
        else:
            queue.append((vertex.right, path + [vertex.left]))
    
        
        if max_height - min_height > 1:
            super_balanced = False
            break

    return super_balanced







In [26]:
tree = Test.BinaryTreeNode(6)


In [21]:
left = tree.insert_left(1)
right = tree.insert_right(0)

In [24]:
tree.right.value

0

In [None]:
right_right = right.insert_right(7)

In [25]:
tree.left

<__main__.Test.BinaryTreeNode at 0x118d17c88>

In [15]:
left = tree.insert_left(1)
right = tree.insert_right(0)
right_right = right.insert_right(7)
right_right.insert_right(8)
result = is_balanced(tree)

False

In [40]:
tree = Test.BinaryTreeNode(1)


In [41]:
right = tree.insert_right(2)

In [42]:
tree.right.left

In [43]:
right = tree.insert_right(2)
right_right = right.insert_right(3)
right_right.insert_right(4)
result = is_balanced(tree)

1
2


AttributeError: 'NoneType' object has no attribute 'left'