$\newcommand{\xv}{\mathbf{x}}
\newcommand{\Xv}{\mathbf{X}}
\newcommand{\yv}{\mathbf{y}}
\newcommand{\zv}{\mathbf{z}}
\newcommand{\av}{\mathbf{a}}
\newcommand{\Wv}{\mathbf{W}}
\newcommand{\wv}{\mathbf{w}}
\newcommand{\tv}{\mathbf{t}}
\newcommand{\Tv}{\mathbf{T}}
\newcommand{\muv}{\boldsymbol{\mu}}
\newcommand{\sigmav}{\boldsymbol{\sigma}}
\newcommand{\phiv}{\boldsymbol{\phi}}
\newcommand{\Phiv}{\boldsymbol{\Phi}}
\newcommand{\Sigmav}{\boldsymbol{\Sigma}}
\newcommand{\Lambdav}{\boldsymbol{\Lambda}}
\newcommand{\half}{\frac{1}{2}}
\newcommand{\argmax}[1]{\underset{#1}{\operatorname{argmax}}}
\newcommand{\argmin}[1]{\underset{#1}{\operatorname{argmin}}}$


<h1><center>Weather or Not, There is Crime</center></h1>
<center>
*Jason Stock, Tom Cavey, Amber Lee*  
*Relationships between crime and weather patterns in Chicago*
</center>

In [3]:
import neuralnetworks as nn
import mlutils as ml
import pandas as pd
import numpy as np 
import random
import seaborn as sns
import matplotlib.pylab as plt
from matplotlib.pyplot import cm
import pickle

class Models:
    
    trained_nn = []
    networks   = []
    
    def __init__(self, num_districts, networks):
        """Initialize number of districts and repsective networks"""
        self.num_districts = num_districts
        self.networks = networks
        
    def sampler(self, data, features, targets):
        """Sample the data to set features, X, and targets, T."""
        X = data.iloc[:, np.r_[features]]
        T = data.iloc[:, np.r_[targets]]
        return np.array(X), np.array(T)

    def train(self, iterations=100, normalize=True, partition=True):
        """Train each network and save the trained nnet object to trained_nn"""
        for district in range(self.num_districts):
            data = pd.read_csv('../Output/WeeklyOutput/wc'+str(district + 1)+'.csv', sep=',', low_memory=False, 
            names = ['date', 'dry', 'wet', 'wind', 'humidity', 'district', 'homicide', 'robbery',
                     'battery', 'assault', 'burglary', 'theft', 'motor', 'weapons']).iloc[70:]
            if normalize:
                cols_to_norm = ['dry', 'wet', 'wind', 'humidity']
                data[cols_to_norm] = data[cols_to_norm].apply(lambda x: (x - x.min()) / (x.max() - x.min()))

            X, T = sampler(data, range(1,5), range(6,14))
        
            if partition: 
                train_f = 0.80
                Xtrain, Ttrain, _, _ = ml.partition(X, T, (train_f, 1 - train_f))

                nnet = nn.NeuralNetwork(Xtrain.shape[1], self.networks[district], Ttrain.shape[1])
                nnet.train(Xtrain, Ttrain, iterations)
            else:  
                nnet = nn.NeuralNetwork(X.shape[1], self.networks[district], T.shape[1]) 
                nnet.train(X, T, iterations)
            
            self.trained_nn.append(nnet)
    
    def use(self, data, return_all=True, district=None):
        """Prints a table of all or specific district results based off provided data.
        The data shall be the weather for the day in the form:
        [dry-bulb-temp, wet-bulb-temp, wind-speed, relative-humidity]"""
        results = []
        
        if return_all:
            for i, network in enumerate(self.trained_nn):
                Y = np.round(network.use(data)[0])
                result = np.insert(Y, 0, i+1)
                results.append(result)
        elif district is not None:
            Y = np.round(self.trained_nn[district - 1].use(data)[0])
            results.append(np.insert(Y, 0, district))
        else:
            print('ERROR, return_all = True, or set to false and specify district number.')
        
        df = pd.DataFrame(results, columns=['district', 'homicide', 'robbery', 'battery', 'assault',
                                            'burglary', 'theft', 'motor', 'weapons']).astype(int)
        print(df.to_string(index=False))
    
def save_pickle():
    """Train Models class with defined networks.
    Then save class to a .pickle file"""
    networks = [
                [65, 76, 21, 49], # district 1
                [65, 76, 21, 49], # district 2
                [65, 76, 21, 49], # district 3
                [65, 76, 21, 49], # district 4
                [5], # district 5
                [65, 76, 21, 49], # district 6
                [65, 76, 21, 49], # district 7
                [65, 76, 21, 49], # district 8
                [65, 76, 21, 49], # district 9
               ]

    M = Models(9, networks)
    M.train()
    
    outfile = open('trained_models.pickle', 'wb')
    pickle.dump(M, outfile, pickle.HIGHEST_PROTOCOL)
    outfile.close()
    
def sampler(data, a, b):
    """Sample the data to set features, X, and targets, T."""
    X = data.iloc[:, np.r_[a]]
    T = data.iloc[:, np.r_[b]]
        
    return np.array(X), np.array(T)

def get_values(X, T, network, train_f, itr, partition = False):
    """Get test results and error trace"""
    if partition: 
        Xtrain, Ttrain, Xtest, T = ml.partition(X, T, (train_f, 1 - train_f))
        
        nnet = nn.NeuralNetwork(Xtrain.shape[1], network, Ttrain.shape[1])
        nnet.train(Xtrain, Ttrain, itr)
        Y = nnet.use(Xtest)
    else:  
        nnet = nn.NeuralNetwork(X.shape[1], network, T.shape[1]) 
        nnet.train(X, T, itr)
        Y = nnet.use(X)
        
    return Y, T, nnet.getErrorTrace()

def network_test(district='4'):
    """Display a run with 20 networks - graphing the error output.  Data is not partitioned"""
    data = pd.read_csv('../Output/WeeklyOutput/wc'+district+'.csv', sep=',', low_memory=False, 
            names = ['date', 'dry', 'wet', 'wind', 'humidity', 'district', 'homicide', 'robbery',
                     'battery', 'assault', 'burglary', 'theft', 'motor', 'weapons']).iloc[70:]
    
    cols_to_norm = ['dry', 'wet', 'wind', 'humidity']
    data[cols_to_norm] = data[cols_to_norm].apply(lambda x: (x - x.min()) / (x.max() - x.min()))

    X, T = sampler(data, range(1, 5), range(6,14))

    sns.set_style("whitegrid")

    numberItr = 1200
    train_f = 0.8
    er = []
    networks = []
    for i in range(20):
        a = random.sample(range(1, 100), np.random.randint(1, 6))
        Y, _T, error = get_values(X, T, a, train_f, numberItr)
        er.append(error)
        networks.append(a)
        numberItr = int(1.05 * numberItr)

    color=iter(cm.rainbow(np.linspace(0,1,20)))
    plt.figure(figsize=(18,8))
    for i, pl in enumerate(er):
        plt.plot(pl, c=next(color), label = 'Network '+str(networks[i]))

    plt.xlabel('Iteration')
    plt.ylabel('Error')
    plt.legend()
    plt.show()
    
def bar_test(year=5, district='4'):
    """Display the crime data for a given year and district"""
    def bar_crime(data, year):
        col = ['homicide', 'robbery','battery', 'assault', 'burglary', 'theft', 'motor', 'weapons']
        plt.figure(figsize=(18,25))

        for i in range(len(col)):
            plt.subplot(4,2,i+1)  
            y = data[col[i]][52*(year - 1):52*year]
            x = np.arange(len(y))
            z = np.polyfit(x, y, 3)

            p = np.poly1d(z)
            p30 = np.poly1d(np.polyfit(x, y, 15))
            xp = np.linspace(0, len(y) - 1, 50)

            plt.bar(x, y, color='tan')
            _ = plt.plot(xp, p(xp), 'k--', xp, p30(xp), 'b-', lw=2.5)
            plt.title(col[i])
            plt.xlabel('days (samples)'), plt.ylabel('num crimes') 

        plt.show() 

    data = pd.read_csv('../Output/WeeklyOutput/wc'+district+'.csv', sep=',', low_memory=False, 
                   names = ['date', 'dry', 'wet', 'wind', 'humidity', 'district', 'homicide', 'robbery',
                            'battery', 'assault', 'burglary', 'theft', 'motor', 'weapons'])
    bar_crime(data, year)


In [None]:
# IMPORT ABOVE FILE

%config InlineBackend.figure_format = 'retina'
%matplotlib inline

# Analysis

## - BLAH
## - BLAH
## - BLAH

In [None]:
bar_test(year=5, district='5')

In [None]:
network_test(district='4')

# Results

## - BLAH
## - BLAH
## - BLAH

# Application

## - BLAH
## - BLAH
## - BLAH

In [4]:
save_pickle()

In [6]:
import pickle
#from final import Models

fp = open('trained_models.pickle', 'rb')
trained_models = pickle.load(fp)
fp.close()

In [8]:
dry_bulb_temp = 30
wet_bulb_temp = 28
wind_speed = 5
relative_humidity = 60

data = [dry_bulb_temp, wet_bulb_temp, wind_speed, relative_humidity]
trained_models.use(data, return_all=True, district=None)

district  homicide  robbery  battery  assault  burglary  theft  motor  weapons
       1         0        4       15        5         8     53     -5       -1
       2         0       13       30        7        28    132     18       -1
       3         0        2       29        8         1     21     10        1
       4         0       11       42       15         5    217     15        1
       5         4       54      209       76        46    264     54       21
       6         3       47      223       74        77    182     47       18
       7         1       37      133       47        50    128     31        9
       8         0       11       71       28        13     71     16        5
       9         1       15      101       38        22     81     22        8


# Challenges

## - BLAH
## - BLAH
## - BLAH

# Conclusion

## - BLAH
## - BLAH
## - BLAH