In [1]:
import numpy as np
import pandas as pd
from datetime import datetime, timedelta
import json as js
from sklearn import metrics
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC


In [2]:
def get_keywords(f_key_dict):
    dicts = {}
    for lag in languages:
        dicts[lag] = {'lemmas':[]}
    for l in open(f_key_dict):
        obj = js.loads(l)
        lag = obj['language']
        if lag in languages:
            lemmas = [item['lemma'] for item in obj['tokens']]
            lemmas = filter(lambda x: len(x) > 1, lemmas)
            if len(lemmas) == 1:
                dicts[lag][lemmas[0]] = 0
            elif len(lemmas) == 2:
                dicts[lag]["_".join(lemmas)] = 0
            elif len(lemmas) == 3:
                dicts[lag]["_".join(lemmas)] = 0
    return dicts


def create_y(f, date_range):
    df_gsr = pd.DataFrame(columns=('date', 'event', 'city'))
    lines = open(f).readlines()
    for i in range(len(lines)):
        obj = js.loads(lines[i])
        if obj['location'][0] == 'Brazil':
            df_gsr.loc[i] = [pd.to_datetime(obj['eventDate'].split("T")[0], format='%Y-%m-%d') + timedelta(days=-1),
                             1,
                             obj['location'][1]]

    df_gsr_selected = df_gsr[(df_gsr.date >= min_date) & (df_gsr.date <= max_date)]
    df_gsr_selected = df_gsr_selected[df_gsr_selected['city'] != '-']
    df_grouped = df_gsr_selected.groupby(['city', 'date']).agg({'event': lambda x: 1})

    cities = pd.unique(df_gsr_selected.city.ravel())
    new_index = pd.MultiIndex.from_product([cities, date_range], names=['city', 'date'])

    df_final = df_grouped.reindex(new_index, fill_value=0)
    return df_final


In [3]:
languages = ['English', 'Portuguese', 'Spanish']
results = []
for l in languages:

    file_keys = "./CU_Keywords.2013-01-25T15-36-29"
    keywords_dict = get_keywords(file_keys)[languages[1]]
    keywords = keywords_dict.keys()
    col_names = keywords + ['date', 'city']
    types = {key: 'int' for key in keywords}
    types['date'] = 'str'
    types['city'] = 'str'
    X = pd.read_csv('features_' + l + '.csv', header=None, names=col_names)
    X = X.groupby(['city', 'date']).agg({key: np.sum for key in keywords})

    models = [LogisticRegression(penalty='l1', class_weight='balanced'),
              # SVC(class_weight="balanced"),
              ]

    for m in models:
        m.fit(X, y.event)
        expected = y.event
        predicted = m.predict(X)
        print(metrics.classification_report(expected, predicted))
        print(metrics.confusion_matrix(expected, predicted))
        precision = metrics.precision_score(expected, predicted, average='binary')
        recall = metrics.recall_score(expected, predicted, average='binary')
        f1 = metrics.f1_score(expected, predicted, average='binary')
        fpr, tpr, thresholds = metrics.roc_curve(expected, predicted, pos_label=1)
        auc = metrics.auc(fpr, tpr)
        results.append([str(m).split("(")[0] + "_" + l, precision, recall, f1, auc])

NameError: name 'y' is not defined

In [5]:
file_gsr = "./gsrAll.json"
min_date = datetime(2014, 1, 1)
max_date = datetime(2014, 12, 31)
dates = pd.date_range(min_date, max_date)
y = create_y(file_gsr, dates)

In [6]:
languages = ['English', 'Portuguese', 'Spanish']
results = []
for l in languages:

    file_keys = "./CU_Keywords.2013-01-25T15-36-29"
    keywords_dict = get_keywords(file_keys)[languages[1]]
    keywords = keywords_dict.keys()
    col_names = keywords + ['date', 'city']
    types = {key: 'int' for key in keywords}
    types['date'] = 'str'
    types['city'] = 'str'
    X = pd.read_csv('features_' + l + '.csv', header=None, names=col_names)
    X = X.groupby(['city', 'date']).agg({key: np.sum for key in keywords})

    models = [LogisticRegression(penalty='l1', class_weight='balanced'),
              # SVC(class_weight="balanced"),
              ]

    for m in models:
        m.fit(X, y.event)
        expected = y.event
        predicted = m.predict(X)
        print(metrics.classification_report(expected, predicted))
        print(metrics.confusion_matrix(expected, predicted))
        precision = metrics.precision_score(expected, predicted, average='binary')
        recall = metrics.recall_score(expected, predicted, average='binary')
        f1 = metrics.f1_score(expected, predicted, average='binary')
        fpr, tpr, thresholds = metrics.roc_curve(expected, predicted, pos_label=1)
        auc = metrics.auc(fpr, tpr)
        results.append([str(m).split("(")[0] + "_" + l, precision, recall, f1, auc])

             precision    recall  f1-score   support

        0.0       1.00      0.19      0.32      8959
        1.0       0.11      0.99      0.20       896

avg / total       0.92      0.26      0.31      9855

[[1717 7242]
 [   5  891]]
             precision    recall  f1-score   support

        0.0       1.00      0.26      0.42      8959
        1.0       0.12      1.00      0.21       896

avg / total       0.92      0.33      0.40      9855

[[2351 6608]
 [   0  896]]
             precision    recall  f1-score   support

        0.0       1.00      0.18      0.30      8959
        1.0       0.11      0.99      0.19       896

avg / total       0.92      0.25      0.29      9855

[[1577 7382]
 [   5  891]]


In [7]:
df = pd.DataFrame({'l':['a','b','c','d'], 'v':[1,2,1,2]})


In [8]:
df

Unnamed: 0,l,v
0,a,1
1,b,2
2,c,1
3,d,2


In [9]:
df['beta'] = 1

In [10]:
df

Unnamed: 0,l,v,beta
0,a,1,1
1,b,2,1
2,c,1,1
3,d,2,1


In [11]:
df.insert(0,'beta2' , 1)

In [12]:
df

Unnamed: 0,beta2,l,v,beta
0,1,a,1,1
1,1,b,2,1
2,1,c,1,1
3,1,d,2,1


In [13]:
X

Unnamed: 0_level_0,Unnamed: 1_level_0,medo,ataque,armar,emboscada,interrogatório,recrudescência,violência_de_rua,kadiweus,essencial,cooperativa,...,intimidar,guarani,vigília,perseguição,perigo,desgosto,promotor,rural,manter,conflito_territorial
city,date,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
Acre,2014-01-01,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
Acre,2014-01-02,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
Acre,2014-01-03,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
Acre,2014-01-04,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
Acre,2014-01-05,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
Acre,2014-01-06,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
Acre,2014-01-07,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
Acre,2014-01-08,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
Acre,2014-01-09,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
Acre,2014-01-10,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [14]:
X.insert(0, "beta", 1)

In [15]:
X

Unnamed: 0_level_0,Unnamed: 1_level_0,beta,medo,ataque,armar,emboscada,interrogatório,recrudescência,violência_de_rua,kadiweus,essencial,...,intimidar,guarani,vigília,perseguição,perigo,desgosto,promotor,rural,manter,conflito_territorial
city,date,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
Acre,2014-01-01,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
Acre,2014-01-02,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
Acre,2014-01-03,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
Acre,2014-01-04,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
Acre,2014-01-05,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
Acre,2014-01-06,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
Acre,2014-01-07,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
Acre,2014-01-08,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
Acre,2014-01-09,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
Acre,2014-01-10,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [16]:
X.shape()

TypeError: 'tuple' object is not callable

In [17]:
shape(X)

NameError: name 'shape' is not defined

In [18]:
X

Unnamed: 0_level_0,Unnamed: 1_level_0,beta,medo,ataque,armar,emboscada,interrogatório,recrudescência,violência_de_rua,kadiweus,essencial,...,intimidar,guarani,vigília,perseguição,perigo,desgosto,promotor,rural,manter,conflito_territorial
city,date,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
Acre,2014-01-01,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
Acre,2014-01-02,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
Acre,2014-01-03,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
Acre,2014-01-04,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
Acre,2014-01-05,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
Acre,2014-01-06,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
Acre,2014-01-07,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
Acre,2014-01-08,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
Acre,2014-01-09,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
Acre,2014-01-10,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [19]:
from numpy import *

In [20]:
X.shape

(9855, 677)

In [21]:
X.as_matrix

<bound method DataFrame.as_matrix of                        beta  medo  ataque  armar  emboscada  interrogatório  \
city      date                                                                
Acre       2014-01-01     1     0       0      0          0               0   
           2014-01-02     1     0       0      0          0               0   
           2014-01-03     1     0       0      0          0               0   
           2014-01-04     1     0       0      0          0               0   
           2014-01-05     1     0       0      0          0               0   
           2014-01-06     1     0       0      0          0               0   
           2014-01-07     1     0       0      0          0               0   
           2014-01-08     1     0       0      0          0               0   
           2014-01-09     1     0       0      0          0               0   
           2014-01-10     1     0       0      0          0               0   
           2014

In [22]:
X.reset_index()

Unnamed: 0,city,date,beta,medo,ataque,armar,emboscada,interrogatório,recrudescência,violência_de_rua,...,intimidar,guarani,vigília,perseguição,perigo,desgosto,promotor,rural,manter,conflito_territorial
0,Acre,2014-01-01,1,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,Acre,2014-01-02,1,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,Acre,2014-01-03,1,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,Acre,2014-01-04,1,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,Acre,2014-01-05,1,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
5,Acre,2014-01-06,1,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
6,Acre,2014-01-07,1,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
7,Acre,2014-01-08,1,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
8,Acre,2014-01-09,1,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
9,Acre,2014-01-10,1,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [23]:
del X['city']

KeyError: 'city'

In [24]:
X

Unnamed: 0_level_0,Unnamed: 1_level_0,beta,medo,ataque,armar,emboscada,interrogatório,recrudescência,violência_de_rua,kadiweus,essencial,...,intimidar,guarani,vigília,perseguição,perigo,desgosto,promotor,rural,manter,conflito_territorial
city,date,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
Acre,2014-01-01,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
Acre,2014-01-02,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
Acre,2014-01-03,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
Acre,2014-01-04,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
Acre,2014-01-05,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
Acre,2014-01-06,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
Acre,2014-01-07,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
Acre,2014-01-08,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
Acre,2014-01-09,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
Acre,2014-01-10,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [25]:
X= X.reset_index()

In [26]:
X

Unnamed: 0,city,date,beta,medo,ataque,armar,emboscada,interrogatório,recrudescência,violência_de_rua,...,intimidar,guarani,vigília,perseguição,perigo,desgosto,promotor,rural,manter,conflito_territorial
0,Acre,2014-01-01,1,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,Acre,2014-01-02,1,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,Acre,2014-01-03,1,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,Acre,2014-01-04,1,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,Acre,2014-01-05,1,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
5,Acre,2014-01-06,1,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
6,Acre,2014-01-07,1,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
7,Acre,2014-01-08,1,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
8,Acre,2014-01-09,1,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
9,Acre,2014-01-10,1,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [27]:
del X['city']

In [28]:
X

Unnamed: 0,date,beta,medo,ataque,armar,emboscada,interrogatório,recrudescência,violência_de_rua,kadiweus,...,intimidar,guarani,vigília,perseguição,perigo,desgosto,promotor,rural,manter,conflito_territorial
0,2014-01-01,1,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,2014-01-02,1,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,2014-01-03,1,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,2014-01-04,1,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,2014-01-05,1,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
5,2014-01-06,1,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
6,2014-01-07,1,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
7,2014-01-08,1,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
8,2014-01-09,1,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
9,2014-01-10,1,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [29]:
del X['date']

In [30]:
X

Unnamed: 0,beta,medo,ataque,armar,emboscada,interrogatório,recrudescência,violência_de_rua,kadiweus,essencial,...,intimidar,guarani,vigília,perseguição,perigo,desgosto,promotor,rural,manter,conflito_territorial
0,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
5,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
6,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
7,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
8,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
9,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [31]:
X[1]

KeyError: 1

In [32]:
X.as_matrix

<bound method DataFrame.as_matrix of       beta  medo  ataque  armar  emboscada  interrogatório  recrudescência  \
0        1     0       0      0          0               0               0   
1        1     0       0      0          0               0               0   
2        1     0       0      0          0               0               0   
3        1     0       0      0          0               0               0   
4        1     0       0      0          0               0               0   
5        1     0       0      0          0               0               0   
6        1     0       0      0          0               0               0   
7        1     0       0      0          0               0               0   
8        1     0       0      0          0               0               0   
9        1     0       0      0          0               0               0   
10       1     0       0      0          0               0               0   
11       1     0       0   

In [33]:
_x = X.as_matrix

In [34]:
_x

<bound method DataFrame.as_matrix of       beta  medo  ataque  armar  emboscada  interrogatório  recrudescência  \
0        1     0       0      0          0               0               0   
1        1     0       0      0          0               0               0   
2        1     0       0      0          0               0               0   
3        1     0       0      0          0               0               0   
4        1     0       0      0          0               0               0   
5        1     0       0      0          0               0               0   
6        1     0       0      0          0               0               0   
7        1     0       0      0          0               0               0   
8        1     0       0      0          0               0               0   
9        1     0       0      0          0               0               0   
10       1     0       0      0          0               0               0   
11       1     0       0   

In [35]:
_x[1]

TypeError: 'instancemethod' object has no attribute '__getitem__'

In [36]:
X.as_matrix()

array([[1, 0, 0, ..., 0, 0, 0],
       [1, 0, 0, ..., 0, 0, 0],
       [1, 0, 0, ..., 0, 0, 0],
       ..., 
       [1, 0, 0, ..., 0, 0, 0],
       [1, 0, 0, ..., 0, 0, 0],
       [1, 0, 0, ..., 0, 0, 0]], dtype=int64)

In [37]:
X

Unnamed: 0,beta,medo,ataque,armar,emboscada,interrogatório,recrudescência,violência_de_rua,kadiweus,essencial,...,intimidar,guarani,vigília,perseguição,perigo,desgosto,promotor,rural,manter,conflito_territorial
0,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
5,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
6,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
7,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
8,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
9,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [38]:
X = X.as_matrix()

In [39]:
X

array([[1, 0, 0, ..., 0, 0, 0],
       [1, 0, 0, ..., 0, 0, 0],
       [1, 0, 0, ..., 0, 0, 0],
       ..., 
       [1, 0, 0, ..., 0, 0, 0],
       [1, 0, 0, ..., 0, 0, 0],
       [1, 0, 0, ..., 0, 0, 0]], dtype=int64)

In [40]:
X[1]

array([1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0,

In [41]:
shape(X[1])

(677L,)

In [42]:
type(shape(X[1]))

tuple

In [43]:
type(X[1])

numpy.ndarray

In [44]:
ones((5, 1))

array([[ 1.],
       [ 1.],
       [ 1.],
       [ 1.],
       [ 1.]])

In [45]:
ones((5, ))

array([ 1.,  1.,  1.,  1.,  1.])

In [46]:
a= ones((5, 1))

In [47]:
a

array([[ 1.],
       [ 1.],
       [ 1.],
       [ 1.],
       [ 1.]])

In [48]:
shape(a)

(5L, 1L)

In [49]:
b = array([1,1,1,1,1])

In [50]:
b

array([1, 1, 1, 1, 1])

In [51]:
shape(b)

(5L,)

In [52]:
a * b

array([[ 1.,  1.,  1.,  1.,  1.],
       [ 1.,  1.,  1.,  1.,  1.],
       [ 1.,  1.,  1.,  1.,  1.],
       [ 1.,  1.,  1.,  1.,  1.],
       [ 1.,  1.,  1.,  1.,  1.]])

In [53]:
b * a

array([[ 1.,  1.,  1.,  1.,  1.],
       [ 1.,  1.,  1.,  1.,  1.],
       [ 1.,  1.,  1.,  1.,  1.],
       [ 1.,  1.,  1.,  1.,  1.],
       [ 1.,  1.,  1.,  1.,  1.]])

In [54]:
dot(a,b)

ValueError: shapes (5,1) and (5,) not aligned: 1 (dim 1) != 5 (dim 0)

In [55]:
dot(b, a)

array([ 5.])