In [50]:
import pandas as pd
import xgboost as xgb
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import itertools
import math as m
from statsmodels import robust
import time
import datetime

from sklearn.base import TransformerMixin
from sklearn.decomposition import PCA
from sklearn.metrics import make_scorer, accuracy_score, roc_auc_score, f1_score
from sklearn.model_selection import GridSearchCV, train_test_split
from sklearn.preprocessing import Imputer, LabelEncoder, StandardScaler, MinMaxScaler, Normalizer
from sklearn.feature_selection import RFECV
from sklearn.model_selection import StratifiedKFold
from sklearn.tree import DecisionTreeClassifier
import scipy.special

import multiprocessing as mp
from itertools import product

import warnings
warnings.filterwarnings('ignore')
# Currently getting warning : 
# label.py:151: DeprecationWarning: The truth value of an empty array is ambiguous. 
# Returning False, but in future this will result in an error. Use 'array.size > 0' to check that 
# an array is not empty.
#
# ... which was apparently fixed pre sklearn 0.19.1 but didn't make it into the build, suppressing for now...

%matplotlib inline

# Load the data
train_df = pd.read_csv('./data/train.csv', header=0)
test_df = pd.read_csv('./data/test.csv', header=0)

LABEL_COL = 'Survived'
CV_FOLDS = 10
SCORER = make_scorer(accuracy_score)
UNKNOWN_VALUE = "XX"
PERC_SURVIVED = train_df[train_df[LABEL_COL] == 1].shape[0]/train_df.shape[0]

### New Features

In [51]:
def get_prefix(input_string):
    first_period_index = str.index(input_string, ".")
    prev_space_index = input_string.rfind(" ", 0, first_period_index-1)
    return input_string[prev_space_index+1:first_period_index+1]

def get_cabin_code(cabins_string):
    if (cabins_string != "" and not pd.isnull(cabins_string)):        
        return cabins_string.split(" ")[0][0]
    else:
        return UNKNOWN_VALUE

def get_cabin_count(cabins_string):
    if (cabins_string != "" and not pd.isnull(cabins_string)):        
        return len(cabins_string.split(" ")[0])
    else:
        return 0

def get_ticket_code(ticket):
    if (ticket != "" and not pd.isnull(ticket)):        
        ticket_split = ticket.split(" ")
        if len(ticket_split) == 1:
            return UNKNOWN_VALUE
        else:
            return ticket_split[0].strip().replace(".","").replace("/","").lower()
    else:
        return UNKNOWN_VALUE
    
def get_lastname(input_string):
    return input_string[0:str.index(input_string, ",")]

# Cols which will be used as features (directly (Fare) or indirectly (Name -> prefix))
cols_to_use = ['Age', 'Fare', 'SibSp', 'Parch', 'Pclass', 'Sex', 'Embarked', 'Name', 'Cabin', 'Ticket']

# use test and train to ensure we see all possible values
train_and_test_X = train_df.append(test_df)
train_and_test_X = train_and_test_X.reset_index()

# Data is documented as wrong for a couple passengers, why not fix it
train_and_test_X.SibSp[train_and_test_X.PassengerId==280] = 0
train_and_test_X.Parch[train_and_test_X.PassengerId==280] = 2
train_and_test_X.SibSp[train_and_test_X.PassengerId==1284] = 1
train_and_test_X.Parch[train_and_test_X.PassengerId==1284] = 1

# subset
train_and_test_X = train_and_test_X[cols_to_use]

# new features
train_and_test_X['Prefix'] = train_and_test_X['Name'].apply(get_prefix)
train_and_test_X['CabinCode'] = train_and_test_X['Cabin'].apply(get_cabin_code)
train_and_test_X['CabinCount'] = train_and_test_X['Cabin'].apply(get_cabin_count)
train_and_test_X['TicketCode'] = train_and_test_X['Ticket'].apply(get_ticket_code)

# Overall Family Size, count the members plus 1 for the person in question
train_and_test_X['ParchSibSp'] = train_and_test_X[['Parch', 'SibSp']].apply(lambda r : r[0] + r[1] + 1, axis=1)

train_and_test_X['LastName'] = train_and_test_X['Name'].apply(get_lastname)

# This field forms sort of a key defining what group a person is travelling with
train_and_test_X['LastName_ParchSibSp'] = train_and_test_X[['LastName', 'ParchSibSp']].apply(lambda r : "%s_%s" % (r[0], r[1]), axis=1)
train_X = train_and_test_X[0:train_df.shape[0]]
train_X_Y = pd.concat([train_X, train_df[LABEL_COL]], axis=1)
LastName_ParchSibSp_counts = train_X_Y.groupby(['LastName_ParchSibSp']).size().reset_index(name='LastName_ParchSibSp_train_count')
LastName_ParchSibSp_survived_counts = train_X_Y[train_X_Y[LABEL_COL] == 1].groupby(['LastName_ParchSibSp']).size().reset_index(name='LastName_ParchSibSp_train_survived_count')
LastName_ParchSibSp_both_counts = LastName_ParchSibSp_counts.merge(LastName_ParchSibSp_survived_counts, on='LastName_ParchSibSp', how='left')
LastName_ParchSibSp_both_counts['LastName_ParchSibSp_train_survived_count'].fillna(0, inplace=True)
# Percent of each group who are known to survive in the train set
# idea is that this will reflect a unseen group member's survival probability
LastName_ParchSibSp_both_counts['train_surv_perc'] = LastName_ParchSibSp_both_counts['LastName_ParchSibSp_train_survived_count']/LastName_ParchSibSp_both_counts['LastName_ParchSibSp_train_count']
train_and_test_X = train_and_test_X.merge(LastName_ParchSibSp_both_counts[['LastName_ParchSibSp', 'train_surv_perc']], how='left')

# Subset to cols we'll use
LABEL_COL = 'Survived'
ID_COL = 'PassengerId'
# These two lists control what raw features will be used, and how they will be treated
numeric_cols = ['Fare', 'Age', 'CabinCount', 'ParchSibSp', 'Pclass']
categorical_cols = ['Sex', 'Prefix', 'Embarked', 'CabinCode', 'TicketCode', 'train_surv_perc']

# Subset to cols we'll use as features
train_and_test_X = train_and_test_X[numeric_cols + categorical_cols]

### Imputation

In [52]:
#
# Impute a particular column grouping by another column
# Fill in with median if numeric, most common value otherwise
#
class ContextualImputer(TransformerMixin):
    
    # constructor params:
    #  col : col to impute
    #  byCol : column within which to impute 'col'
    def __init__(self, col, byCol):
        self.col = col
        self.byCol = byCol
        
    def fit(self, X, y=None):
        self.bycolumn_unique_values = np.unique(X[self.byCol].values)
        
        if X[self.col].dtype != np.dtype('O'):
            bycolumn_fill_values = [np.median((X[pd.notnull(X[self.col]) & (X[self.byCol] == bycolumn_unique_value2)][self.col])) 
                                  for bycolumn_unique_value2 in self.bycolumn_unique_values]
        else:
            bycolumn_fill_values = [X[pd.notnull(X[self.col]) & (X[self.byCol] == bycolumn_unique_value)][self.col].value_counts().index[0]  
                                  for bycolumn_unique_value in self.bycolumn_unique_values]

        bycolumn_values_fill_zipped = np.column_stack((self.bycolumn_unique_values, bycolumn_fill_values))
        self.bycolumn_values_fill_zipped_dict = dict(bycolumn_values_fill_zipped)
        
        return self

    def transform(self, X, y=None):
        def fill(row):
            if pd.isnull(row[self.col]):
                return self.bycolumn_values_fill_zipped_dict[row[self.byCol]]
            else:
                return row[self.col]

        X[self.col] = X[self.col].fillna(X.apply(fill, axis=1))
        return X

#
# Typical imputer found around the web.  Most common value for categoricals,
# median for numerics.  
#
# We'll use it for categoricals, and above customer imputer for numerics
#
class CategImputer(TransformerMixin):

    def __init__(self):
        """Impute missing values.
        If the Series is of dtype Object, then impute with the most frequent object.
        If the Series is not of dtype Object, then impute with the mean.  
        """
    def fit(self, X, y=None):
        self.fill = pd.Series([X[c].value_counts().index[0]
                               if X[c].dtype == np.dtype('O')
                               else X[c].median() for c in X], index=X.columns)
        return self

    def transform(self, X, y=None):
        return X.fillna(self.fill)
    
#
# Imputation of missing vals
#
all_X_imputed = ContextualImputer('Age', 'Pclass').fit_transform(train_and_test_X)
all_X_imputed = ContextualImputer('Fare', 'Pclass').fit_transform(all_X_imputed)

all_X_imputed['Embarked'] = np.where(all_X_imputed['Embarked'].isnull(), 'C', all_X_imputed['Embarked'])

all_X_imputed['train_surv_perc'].fillna(PERC_SURVIVED, inplace=True)

# most common value for categoricals and medians for numerics
all_X_imputed = CategImputer().fit_transform(all_X_imputed)

# all_X_imputed[all_X_imputed.isnull().any(axis=1)].shape # double check

### Encoding

In [53]:
all_X_encoded = all_X_imputed

# Onehot encoding of cateogricals using get_dummies
# Although Pclass is a number, it's really a code and we'll treat it as such
for categorical_col in ['Pclass', 'Embarked']:
    all_X_encoded = pd.concat([pd.get_dummies(all_X_imputed[categorical_col], 
                                              prefix=categorical_col, drop_first=True), 
                               all_X_encoded], axis=1)

# can't leave categoricals numbers, Sex is binary so just map to 0/1
all_X_encoded['Sex'] = all_X_encoded['Sex'].map({'male':0, 'female': 1})

# Bin some features by survival likelihood, leave out last categ
all_X_encoded["ParchSibSp_1"] = all_X_encoded["ParchSibSp"].map(lambda s: 1 if s == 1 else 0)
all_X_encoded["ParchSibSp_23"] = all_X_encoded["ParchSibSp"].map(lambda s: 1 if (s == 2 or s == 3) else 0)
all_X_encoded["ParchSibSp_4"] = all_X_encoded["ParchSibSp"].map(lambda s: 1 if (s == 4) else 0)

# high_prob = ['Master.', Mme.', 'Ms.', 'Lady.', 'Sir.', 'Mlle.', 'Countess']
# medhigh_prob = ['Mrs.', 'Miss.']
# med_prob = ['Dr.', 'Major.', 'Col.', 'Mrs.']
# low_prob = ['Mr.']
# very_low_prob = ['Capt.', 'Don.', 'Rev.', 'Jonkheer.']
# all_X_encoded["Prefix_high"] = all_X_encoded["Prefix"].map(lambda s: 1 if s in high_prob else 0)
# all_X_encoded["Prefix_medhigh_prob"] = all_X_encoded["Prefix"].map(lambda s: 1 if s in medhigh_prob else 0)
# all_X_encoded["Prefix_med"] = all_X_encoded["Prefix"].map(lambda s: 1 if s in med_prob else 0)
# all_X_encoded["Prefix_low"] = all_X_encoded["Prefix"].map(lambda s: 1 if s in low_prob else 0)

# CabinCode
# high_prob = ['E', 'D', 'B']
# medhigh_prob = ['A', 'F']
# med_prob = ['C', 'G']
# low_prob = ['XX']
# very_low_prob = ['T']
# all_X_encoded["CabinCode_high"] = all_X_encoded["CabinCode"].map(lambda x: 1 if x in high_prob else 0)
# all_X_encoded["CabinCode_medhigh"] = all_X_encoded["CabinCode"].map(lambda x: 1 if x in medhigh_prob else 0)
# all_X_encoded["CabinCode_med"] = all_X_encoded["CabinCode"].map(lambda x: 1 if x in med_prob else 0)
# all_X_encoded["CabinCode_low"] = all_X_encoded["CabinCode"].map(lambda x: 1 if x in low_prob else 0)

# CabinCount
# high_prob = [2, 3, 4]
# med_prob = [1]
# low_prob = [0]
# all_X_encoded["CabinCount_high"] = all_X_encoded["CabinCount"].map(lambda x: 1 if x in high_prob else 0)
# all_X_encoded["CabinCount_med"] = all_X_encoded["CabinCount"].map(lambda x: 1 if x in med_prob else 0)

# Age bins
# all_X_encoded["Age_child"] = all_X_encoded["Age"].map(lambda x: 1 if x < 15 else 0)
# all_X_encoded["Age_youngadult"] = all_X_encoded["Age"].map(lambda x: 1 if (x >= 15 and x < 33) else 0)
# all_X_encoded["Age_adult"] = all_X_encoded["Age"].map(lambda x: 1 if (x >= 33 and x < 55) else 0)

# TicketCode
high_prob = ['swpp', 'sc']
med_high_prob = ['pc', 'pp', 'fcc', 'scah']
med_prob = ['stono2', 'XX', 'ca', 'scparis', 'stono', 'c', 'ppp']
low_prob = ['a5', 'soc', 'wc', 'sotonoq', 'wep']
very_low_prob = ['sca4', 'a4', 'sp', 'spo', 'fa', 'scow', 'as', 'sopp', 'fc', 'sotono2', 'casoton']
all_X_encoded["TicketCode_high"] = all_X_encoded["TicketCode"].map(lambda s: 1 if s in high_prob else 0)
all_X_encoded["TicketCode_medhigh"] = all_X_encoded["TicketCode"].map(lambda s: 1 if s in med_high_prob else 0)
all_X_encoded["TicketCode_med"] = all_X_encoded["TicketCode"].map(lambda s: 1 if s in med_prob else 0)
all_X_encoded["TicketCode_low"] = all_X_encoded["TicketCode"].map(lambda s: 1 if s in low_prob else 0)
all_X_encoded["TicketCode_very_low"] = all_X_encoded["TicketCode"].map(lambda s: 1 if s in very_low_prob else 0)

all_X_final = all_X_encoded

# Final dropping of features not to be used in classification
all_X_final.drop("Prefix", inplace=True, axis=1)
all_X_final.drop("TicketCode", inplace=True, axis=1)
all_X_final.drop("CabinCode", inplace=True, axis=1)
all_X_final.drop("CabinCount", inplace=True, axis=1)
all_X_final.drop("Embarked", inplace=True, axis=1)
all_X_final.drop("Pclass", inplace=True, axis=1)
all_X_final.drop("Age", inplace=True, axis=1)
all_X_final.drop("train_surv_perc", inplace=True, axis=1)

train_X_matrix = all_X_final[0:train_df.shape[0]].as_matrix()
train_Y_matrix = train_df[LABEL_COL]
test_X = all_X_final[all_X_final.shape[0]-test_df.shape[0]::]

## Homemade NN

In [54]:
class neuralNetwork:
    
    def __init__(self, inputnodes, hiddennodes, outputnodes, 
                 learningrate, 
                 activation_function, inverse_activation_function):
        self.inodes = inputnodes
        self.hnodes = hiddennodes
        self.onodes = outputnodes
        self.lr = learningrate
        self.wih = (np.random.normal(0.0, pow(self.hnodes, -.5), (self.hnodes, self.inodes))) # array of shape hnodes by inodes
        self.who = (np.random.normal(0.0, pow(self.onodes, -.5), (self.onodes, self.hnodes))) # array of shape hnodes by inodes
        self.activation_function = activation_function
        self.inverse_activation_function = inverse_activation_function
        
    def train(self, inputs_list, targets_list):
        targets = np.array(targets_list, ndmin=2).T
        inputs = np.array(inputs_list, ndmin=2).T
        
        hidden_inputs = np.dot(self.wih, inputs)
        hidden_outputs = self.activation_function(hidden_inputs)

        final_inputs = np.dot(self.who, hidden_outputs)
        final_ouputs = self.activation_function(final_inputs)

        output_errors = targets - final_ouputs
        hidden_errors = np.dot(self.who.T, output_errors)
        
        self.who += self.lr * np.dot((output_errors * final_ouputs * (1.0 - final_ouputs)), np.transpose(hidden_outputs))
        self.wih += self.lr * np.dot((hidden_errors * hidden_outputs * (1.0 - hidden_outputs)), np.transpose(inputs))

    def query(self, inputs_list):
        inputs = np.array(inputs_list, ndmin=2).T
        
        hidden_inputs = np.dot(self.wih, inputs)
        hidden_outputs = self.activation_function(hidden_inputs)

        final_inputs = np.dot(self.who, hidden_outputs)
        final_ouputs = self.activation_function(final_inputs)
        
        return final_ouputs
    
    #
    # Wrapper for query which uses argmax to convert return values to hard labels (0,1)
    #
    def predict(self, inputs):
        final_outputs = self.query(inputs)
        return list(map(np.argmax, list(zip(final_outputs[0], final_outputs[1]))))

In [55]:
#
# Train NN across parameter combos, return best combo based on avg k-fold metric
#
# returns - (best params (dict), best model (neuralNetwork))
# 
def cross_train_neural_net_cv(train_X_scaled, train_Y,  
                              hidden_node_count_multiplier_params, learning_rate_params, epoch_params, 
                              activation_functions,
                              n_folds=2, metric=accuracy_score, metric_function=np.mean,
                              verbose=0):
    
    num_input_nodes = train_X_scaled.shape[1]
    num_output_nodes = 2
    
    # number of nodes in the hidden layer is average of input and output 
    # multiplied by hidden_node_count_multiplier_params
    avg_nodes_input_output = int((float(num_input_nodes) + float(num_output_nodes))/2)
    num_hidden_nodes = list(map(lambda x: int(round(x[0] * x[1])), zip(hidden_node_count_multiplier_params, 
                                                                   np.full(len(hidden_node_count_multiplier_params), 
                                                                           avg_nodes_input_output))))

    best_score = -1
    best_params = None
    num_combos = len(learning_rate_params) * len(num_hidden_nodes) * len(epoch_params)
    combo_count = 0

    skf = StratifiedKFold(n_splits = n_folds)
    
    for learning_rate in learning_rate_params:
        for hidden_node_count in num_hidden_nodes:
            for epochs in epoch_params:
                for activation_function_tuple in activation_functions:

                    skf_indices = skf.split(train_X_scaled, train_Y)
                    combo_count += 1
                    if (verbose > 0) and ((combo_count % 20) == 0) :
                        print("cross_train_neural_net_holdout : processed {0} of {1} parameter combinations.".format(combo_count, num_combos))

                    fold_scores = []
                    for train_index, test_index in skf_indices:

                        train_fold_X = train_X_scaled[train_index]
                        train_fold_Y = train_Y[train_index]
                        test_fold_X = train_X_scaled[test_index]
                        test_fold_Y = train_Y[test_index]

                        trained_nn = train_nn_cv(num_input_nodes, num_output_nodes, 
                                                 hidden_node_count, learning_rate, epochs, 
                                                 activation_function_tuple[0], activation_function_tuple[1], 
                                                 train_fold_X, train_fold_Y)
                        
                        fold_scores.append(calculate_score(trained_nn, 
                                                             learning_rate, epochs, hidden_node_count, 
                                                             test_fold_X, test_fold_Y,
                                                             metric=metric, verbose=verbose))

                    param_combo_score = metric_function(fold_scores)
                    
                    if verbose > 1:
                        print("fold_scores = {0}".format(fold_scores))

                    if param_combo_score > best_score:
                        best_score = param_combo_score
                        best_params = {"hidden_node_count": hidden_node_count, 
                                       "learning_rate": learning_rate, 
                                       "epochs": epochs,
                                       "activation_functions": activation_function_tuple
                                      }
                
        # finally train all data on best param combo
        best_nn = train_nn_cv(num_input_nodes, num_output_nodes, 
                           best_params['hidden_node_count'], best_params['learning_rate'], best_params['epochs'], 
                           best_params['activation_functions'][0], best_params['activation_functions'][1],
                           train_X_scaled, train_Y)

    if verbose > 0:
        print("\nBEST SCORE : {0}\nBEST_PARAMS : {1}".format(best_score, best_params))

    return (best_params, best_nn)

#
# Train neural net with given params
#
def train_nn_cv(num_input_nodes, num_output_nodes, 
                hidden_node_count, lr, epochs, 
                activation_function, inverse_activation_function,
                train_X_scaled, train_Y):
    
    trained_nn = neuralNetwork(num_input_nodes, hidden_node_count, num_output_nodes, 
                               lr, 
                               activation_function, inverse_activation_function)

    for e in range(epochs):
        rowcount = 0
        train_combined = np.column_stack((train_X_scaled, train_Y))
        
        # train on each row
        for row in train_combined:
            targets = np.zeros(num_output_nodes) + 0.01
            targets[int(row[train_combined.shape[1]-1])] = .99
            trained_nn.train(row[0:train_combined.shape[1]-1], targets)   
            rowcount += 1
            
    return trained_nn

#
# Test a neural network for a particular param combo and metric type and return the score
# 
# args:
#   nn - trained neural net classifier
#   test_data_X - data to predict labels for and compare to actual
#   test_data_Y - actual labels
#   lr, epochs, hidden_node_count = params for nn
#
def calculate_score(nn, lr, epochs, hidden_node_count, 
                    test_data_X, test_data_Y, metric=accuracy_score, verbose=0):
    
    labels = nn.predict(test_data_X)
    
    try:
        score = metric(labels, test_data_Y)

        if verbose > 1 :
            print("calculate_score({4}) : epochs : {0}, lr : {1}, hidden_node_count : {2}, test score = {3}".format(epochs, lr, 
                                                                                                                hidden_node_count, 
                                                                                                                score, str(metric)))
        return score
    except Exception as e:
        print("Error occurred scoring : {0}".format(e))
        print("  epochs : {0}, lr : {1}, hidden_node_count : {2}, metric : {3}".format(epochs, 
                                                                                       lr, 
                                                                                       hidden_node_count, 
                                                                                       str(metric)))
        return 0

#
# Return the lower bound of the 99 percent confidence interval of the given list of scores
# Used as a comparison function in cross validation
#
def conf_int_low_bound(scores):
    mean = np.mean(scores)
    std = np.std(scores)
    return mean - 3*std

In [56]:
start_time = time.time()

# Fit scaler from ALL data
scaler = MinMaxScaler().fit(all_X_final)

# this will be multiplied by the average of the number of input and output nodes
#HIDDEN_NODE_COUNT_MULTIPLIERS = [1.0]
HIDDEN_NODE_COUNT_MULTIPLIERS = [.5, 1.0, 1.5, 2.0, 4.0, 8.0]
#LEARNING_RATES = [.01]
LEARNING_RATES = [.001, .005, .01, .1]
#EPOCHS = [10]
EPOCHS = [10, 20, 30]

# pairs of activation/inverse functions
ACTIVATION_FUNCTIONS = [(scipy.special.expit, scipy.special.logit)]

# Train and pick best model based on CV metric
train_X_scaled = scaler.transform(train_X_matrix)
best_params, best_nn = cross_train_neural_net_cv(train_X_scaled, train_Y_matrix, 
                                                 HIDDEN_NODE_COUNT_MULTIPLIERS, LEARNING_RATES, EPOCHS, ACTIVATION_FUNCTIONS, 
                                                 n_folds=10, metric=accuracy_score, metric_function=conf_int_low_bound,
                                                 verbose=1)

# apply best_nn to test data and save results
test_X_scaled = scaler.transform(test_X)
test_predictions = best_nn.predict(test_X_scaled)
nn_final_predictions_df = pd.DataFrame({'PassengerId': test_df['PassengerId'], 'Survived': test_predictions})
nn_final_predictions_df.to_csv("submission_nn.csv", index=False)

print("NN : num inputs : {0}".format(best_nn.inodes))
print("NN : CV elapsed time : {0}".format((time.time() - start_time)))
print("Last run time : {0}".format(datetime.datetime.now()))

cross_train_neural_net_holdout : processed 20 of 72 parameter combinations.
cross_train_neural_net_holdout : processed 40 of 72 parameter combinations.
cross_train_neural_net_holdout : processed 60 of 72 parameter combinations.

BEST SCORE : 0.7617161981327182
BEST_PARAMS : {'hidden_node_count': 64, 'learning_rate': 0.01, 'epochs': 10, 'activation_functions': (<ufunc 'expit'>, <ufunc 'logit'>)}
NN : num inputs : 15
NN : CV elapsed time : 390.24461007118225
Last run time : 2018-04-17 08:26:08.820887


In [57]:
all_X_final.columns

Index(['Embarked_Q', 'Embarked_S', 'Pclass_2', 'Pclass_3', 'Fare',
       'ParchSibSp', 'Sex', 'ParchSibSp_1', 'ParchSibSp_23', 'ParchSibSp_4',
       'TicketCode_high', 'TicketCode_medhigh', 'TicketCode_med',
       'TicketCode_low', 'TicketCode_very_low'],
      dtype='object')

In [63]:
np.array([[5, 78, 2, 34, 0],
[6, 79, 3, 35, 1],
[7, 80, 4, 36, 2]]).T

array([[ 5,  6,  7],
       [78, 79, 80],
       [ 2,  3,  4],
       [34, 35, 36],
       [ 0,  1,  2]])

In [65]:
odd = np.array([[1, 3], [5, 7]])
even = np.array([[2, 4], [6, 8]])

In [68]:
odd

array([[1, 3],
       [5, 7]])

In [69]:
even

array([[2, 4],
       [6, 8]])

In [66]:
odd * even

array([[ 2, 12],
       [30, 56]])

In [67]:
np.dot(odd, even)

array([[20, 28],
       [52, 76]])

In [71]:
np.array([[5, 78, 2, 34, 0],
[6, 79, 3, 35, 1],
[7, 80, 4, 36, 2]])

array([[ 5, 78,  2, 34,  0],
       [ 6, 79,  3, 35,  1],
       [ 7, 80,  4, 36,  2]])