In [280]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import scipy.stats as sps
plt.style.use('ggplot')
from sympy import *
import copy
import re
from matplotlib import cm
from scipy import sparse
import scipy.sparse as sprs
from sklearn.svm import SVC
from scipy.sparse.csr import csr_matrix
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.metrics import make_scorer
from sklearn.metrics import accuracy_score
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor
from sklearn.linear_model import Lasso, LogisticRegression
from sklearn.metrics import roc_auc_score
from sklearn import cross_validation
from sklearn.ensemble import BaggingClassifier, AdaBoostClassifier, RandomForestClassifier
from joblib import Parallel, delayed
import multiprocessing
from sklearn.model_selection import GridSearchCV
%matplotlib inline

In [4]:
def get_good_train(name):
    id = 0
    ids = []
    word = []
    first_form = []
    part_first = []
    for line in open(name):
        line = re.split('[\,,+]',line[:-1])
        if line[0] == 'Id':
            continue
        for i in range(2, len(line),2):
            ids.append(id)
            word.append(line[1])
            first_form.append(line[i])
            part_first.append(line[i+1])
            id += 1
    frame = pd.DataFrame()
    frame['X'] = word
    frame['Y'] = first_form
    frame['P'] = part_first
    return frame

In [5]:
row_train=get_good_train('task2_lemmas_train')

In [271]:
row_train.head()

Unnamed: 0,X,Y,P,shifts
0,vergognerete,vergognare,V,5
1,amnistiavate,amnistiare,V,4
2,menomazione,menomazione,N,0
3,sfaldavamo,sfaldare,V,4
4,sfodererei,sfoderare,V,4


In [9]:
row_test = pd.read_csv('task2_lemmas_test')
row_test.columns = ['id', 'X']
row_test = row_test.drop('id',axis=1)

In [10]:
row_test.head()

Unnamed: 0,X
0,gettonan
1,incidentali
2,involtino
3,lievi
4,comunistizzasse


In [11]:
np.unique(row_train['P'])

array(['A', 'N', 'V'], dtype=object)

In [320]:
def add_count_vectorizer_features(train, test):
    vectorizer = CountVectorizer(min_df=2, max_df=.9,
                             max_features=None,
                             ngram_range=(3,10),
                             lowercase=True,
                             analyzer='char_wb', 
                             binary=False)
    TTrainM = vectorizer.fit_transform(train[0])
    TTestM = vectorizer.transform(test[0])
    return TTrainM, TTestM

def get_F_for_predict_part(train, test):
    CV_F_train, CV_F_test = add_count_vectorizer_features(train, test)
    chars = {'A':0,'N':1,'V':2}
    _P = np.array([chars[c] for c in train]).reshape(len(train['id']),1)
    return CV_F_train, CV_F_test, _P

In [13]:
def best_common_substr(s1, s2):
   m = [[0] * (1 + len(s2)) for i in range(1 + len(s1))]
   longest, x_longest = 0, 0
   for x in range(1, 1 + len(s1)):
       for y in range(1, 1 + len(s2)):
           if s1[x - 1] == s2[y - 1]:
               m[x][y] = m[x - 1][y - 1] + 1
               if m[x][y] > longest:
                   longest = m[x][y]
                   x_longest = x
           else:
               m[x][y] = 0
   return s1[x_longest - longest: x_longest]

def best_common_suff(s1, s2):
    pref = ''
    for i in range(min(len(s1), len(s2))):
        if s1[len(s1) - 1 - i] != s2[len(s2) - 1 - i]:
            break
        pref += s1[len(s1) - 1 - i]
    return pref

In [243]:
def best_common_prefix(s1, s2):
    pref = ''
    for i in range(min(len(s1), len(s2))):
        if s1[i] != s2[i]:
            break
        pref += s1[i]
    return pref

def extract_all_pref(train):
    prefs = dict()
    shifts = []
    for i in range(len(train)):
        w = train['X'][i]
        f = train['Y'][i]
        pref = best_common_prefix(w, f)
        part = train['P'][i]
        if pref not in prefs:
            prefs[pref] = dict()
        if part not in prefs[pref]:
            prefs[pref][part] = []
        prefs[pref][part].append(f)
        shifts.append(len(w) - len(pref))
    return prefs, shifts
                   
def extract_all_suff(train):
    prefs = dict()
    for i in range(len(train)):
        w = train['X'][i]
        f = train['Y'][i]
        pref = best_common_prefix(w, f)
        suff1 = w[len(pref):]
        suff2 = f[len(pref):]
        if len(suff1) <= 1:
            continue
        if suff1 in prefs:
            if len(suff2) > len(prefs[suff1][0]):
                continue
        prefs[suff1] = (suff2,train['P'][i])
    return prefs

In [244]:
%%time
prefs,shifts =  extract_all_pref(row_train)

CPU times: user 15.5 s, sys: 332 ms, total: 15.8 s
Wall time: 17.4 s


In [245]:
def add_column(frame, data, names):
    new_fr = pd.DataFrame(data, columns=names)
    return pd.concat((frame, new_fr),axis=1)

In [246]:
row_train = add_column(row_train, shifts, ['shifts'])

In [247]:
row_train.head()

Unnamed: 0,X,Y,P,shifts
0,vergognerete,vergognare,V,5
1,amnistiavate,amnistiare,V,4
2,menomazione,menomazione,N,0
3,sfaldavamo,sfaldare,V,4
4,sfodererei,sfoderare,V,4


In [16]:
def predict_with_substr(test, prefs):
    bad = 0
    pred  = []
    for s in test['X']:
        er = 1
        found = None
        best = None
        for i in range(len(s)):
                for j in range(i+1,len(s)):
                    cur = s[i:j]
                    if cur in substrs:
                        if found is None or len(cur) > found:
                            found = len(cur)
                            best = cur
                            er = 0
        if best is not None:
            pred.append(substrs[best][0] + '+' + substrs[best][1])
                            
        if er == 1:
            bad += 1
            pred.append(s + '+V')
    return pred,bad

In [17]:
def predict_with_suff(test, prefs):
    bad = 0
    pred  = []
    for s in test['X']:
        er = 1
        for i in range(len(s)-1,-1,-1):
            cur = s[i:]
            if cur in suffs:
                pred.append(s[:-len(cur)] + suffs[cur][0] + '+' + suffs[cur][1])
                er = 0
                break
                                    
        if er == 1:
            bad += 1
            pred.append(s + '+V')
    return pred,bad

In [321]:
def predict_with_prefs(test, prefs):
    bad = 0
    pred  = []
    for s in test['X']:
        er = 1
        for i in range(len(s),0,-1):
            cur = s[:i]
            if cur in prefs:
                pred.append(prefs[cur])
                er = 0
                break
                                    
        if er == 1:
            bad += 1
            pred.append(None)
    return pred,bad

In [147]:
i = 0 
for v in prefs:
    print(prefs[v])
    i += 1
    if i == 20:
        break

{'V': {'vergognare'}, 'N': {'vergogna'}}
{'V': {'amnistiare'}, 'A': {'amnistiare'}}
{'N': {'menomazione'}}
{'V': {'sfaldare'}}
{'V': {'sfoderare'}}
{'V': {'ascondere'}}
{'V': {'edificare'}}
{'V': {'maschiare'}, 'A': {'maschio'}}
{'V': {'transennare'}, 'N': {'transenna'}}
{'V': {'computare'}, 'A': {'computare'}}
{'V': {'accudire'}}
{'V': {'dirompere'}}
{'V': {'intercollegare'}}
{'V': {'integrare'}}
{'V': {'sbramare'}}
{'V': {'stravaccare'}}
{'V': {'oltrepassare'}}
{'V': {'cauzionare'}}
{'V': {'scarrozzare'}, 'A': {'scarrozzare'}}
{'V': {'intossicare'}}


In [190]:
many = [prefs[v] for v in prefs if prefs[v] is not None 
        and 
        (('V' in prefs[v] and prefs[v]['V'] is not None and len(prefs[v]['V']) > 1) or
         ('A' in prefs[v] and prefs[v]['A'] is not None and len(prefs[v]['A']) > 1) or
         ('N'in prefs[v] and prefs[v]['N'] is not None and len(prefs[v]['N']) > 1))
         ]
print(len(many), len(many)/len(y_pred), many)

139 0.004686288392164796 [{'V': {'soffriggere', 'soffrire'}}, {'V': {'corrompere', 'correre'}}, {'V': {'accorgere', 'accorare'}}, {'V': {'infiltrarsi', 'infiltrare'}}, {'N': {'gobba', 'gobbo'}}, {'V': {'creare', 'crescere'}}, {'A': {'indicatore', 'indicato'}}, {'N': {'cappella', 'cappello'}}, {'V': {'falcare', 'falciare'}}, {'V': {'rischiarare', 'rischiare'}}, {'V': {'spiare'}, 'A': {'spiare', 'spia'}}, {'V': {'astrologare'}, 'N': {'astrologo', 'astrologa'}}, {'N': {'infermiera', 'infermiere'}}, {'V': {'potere', 'potare'}}, {'V': {'protestare'}, 'N': {'protesta', 'protesto'}}, {'V': {'rimanere', 'rimare'}}, {'V': {'scuotere', 'essere', 'percuotere', 'consistere', 'risedere', 'uscire', 'esistere', 'desistere'}, 'A': {'ripercuotere', 'buono', 'risedere', 'grande'}}, {'V': {'corrodere', 'corrompere'}}, {'V': {'pendolare'}, 'N': {'pendola', 'pendolo'}}, {'A': {'scarno', 'scarnire'}}, {'A': {'penultimo'}, 'N': {'penultimo', 'penultima'}}, {'N': {'depressa', 'depresso'}}, {'V': {'mordere', '

CPU times: user 72.5 ms, sys: 3.16 ms, total: 75.6 ms
Wall time: 76.4 ms


In [173]:
mult

6042

In [174]:
print(mult, bad, len(y_pred), (len(y_pred) - mult - bad)/len(y_pred), mult/len(y_pred), bad/len(y_pred))

6042 329 29661 0.7852061629749503 0.2037018306867604 0.011092006338289336


In [179]:
y_pred[1000:1020]

[{'A': {'porgere'}},
 {'V': {'espungere'}},
 {'V': {'arcaizzare'}},
 {'N': {'settore'}},
 {'A': {'centesimo'}, 'N': {'centesimo'}},
 None,
 {'V': {'canterellare'}},
 {'A': {'rinunciatario'}},
 {'V': {'presupporre'}},
 {'V': {'insorgere'}},
 {'V': {'seghettare'}},
 {'V': {'calamitare'}},
 {'A': {'sfamare'}, 'V': {'sfamare'}},
 {'V': {'sobbarcare'}},
 {'A': {'grippare'}, 'V': {'grippare'}},
 {'N': {'schematizzazione'}},
 {'A': {'mobiliare'}},
 {'V': {'sgombrare'}},
 {'N': {'accento'}, 'V': {'accentare'}},
 {'V': {'respingere'}}]

In [29]:
mult_eq = [ y for y in y_pred 
       if y is not None and len(y) == 2 and list(y)[0][0] == list(y)[1][0]]
print(len(mult_eq), len(mult_eq)/len(y_pred), mult_eq)

4367 0.14723036984592563 [{('comunistizzare', 'V'), ('comunistizzare', 'A')}, {('imbrodare', 'V'), ('imbrodare', 'A')}, {('elettrizzare', 'A'), ('elettrizzare', 'V')}, {('distinguere', 'V'), ('distinguere', 'A')}, {('fertilizzare', 'A'), ('fertilizzare', 'V')}, {('decantare', 'V'), ('decantare', 'A')}, {('scapitare', 'V'), ('scapitare', 'A')}, {('sgomentare', 'A'), ('sgomentare', 'V')}, {('standardizzare', 'A'), ('standardizzare', 'V')}, {('decadente', 'A'), ('decadente', 'N')}, {('arricciare', 'A'), ('arricciare', 'V')}, {('posizionare', 'A'), ('posizionare', 'V')}, {('bisbocciare', 'V'), ('bisbocciare', 'A')}, {('rimembrare', 'A'), ('rimembrare', 'V')}, {('controfirmare', 'V'), ('controfirmare', 'A')}, {('incatramare', 'V'), ('incatramare', 'A')}, {('affascinare', 'V'), ('affascinare', 'A')}, {('distinguere', 'V'), ('distinguere', 'A')}, {('espungere', 'A'), ('espungere', 'V')}, {('assicurare', 'A'), ('assicurare', 'V')}, {('speronare', 'V'), ('speronare', 'A')}, {('rubacchiare', 'A'

In [30]:
pairs_in_ord = [y[-1][1] for y in y_pred_ords 
                if y is not None and 
                len(set(y)) > 1
               ]

In [31]:
print(len(pairs_in_ord), len(pairs_in_ord)/len(y_pred), pairs_in_ord)

6249 0.21068069181753818 ['V', 'V', 'V', 'A', 'V', 'V', 'V', 'V', 'V', 'V', 'V', 'V', 'V', 'V', 'V', 'N', 'V', 'V', 'A', 'V', 'V', 'V', 'V', 'N', 'V', 'V', 'V', 'V', 'V', 'V', 'V', 'V', 'V', 'V', 'V', 'V', 'A', 'V', 'V', 'V', 'V', 'V', 'V', 'V', 'V', 'V', 'V', 'V', 'V', 'V', 'V', 'V', 'A', 'V', 'V', 'V', 'V', 'V', 'V', 'V', 'V', 'V', 'V', 'A', 'V', 'V', 'V', 'A', 'V', 'A', 'V', 'V', 'A', 'N', 'V', 'V', 'V', 'V', 'V', 'V', 'V', 'N', 'V', 'V', 'V', 'V', 'V', 'V', 'V', 'V', 'V', 'V', 'V', 'V', 'V', 'V', 'V', 'V', 'V', 'V', 'V', 'V', 'V', 'N', 'V', 'V', 'V', 'V', 'V', 'V', 'V', 'A', 'V', 'V', 'V', 'V', 'V', 'V', 'V', 'V', 'V', 'V', 'V', 'A', 'V', 'V', 'V', 'V', 'V', 'V', 'N', 'V', 'V', 'V', 'V', 'V', 'V', 'A', 'V', 'V', 'V', 'V', 'V', 'V', 'V', 'V', 'V', 'V', 'V', 'V', 'V', 'A', 'V', 'V', 'V', 'V', 'V', 'V', 'N', 'V', 'V', 'V', 'V', 'V', 'A', 'V', 'N', 'V', 'V', 'V', 'V', 'A', 'V', 'V', 'V', 'V', 'V', 'V', 'V', 'V', 'N', 'V', 'V', 'V', 'A', 'V', 'V', 'V', 'V', 'V', 'V', 'V', 'V', 'V', 'V',

In [32]:
len([y for y in pairs_in_ord if y == 'V'])

5453

In [322]:
def add_column(_frame, data, names):
    new_fr = pd.DataFrame(data, columns=names)
    return pd.concat((_frame, new_fr),axis=1)

def add_custom(frame, f, name):
    data = np.array([f(x) for x in frame['X']]).reshape((len(frame['X']),1))
    return add_column(frame, data, [name])

def normalize(frame, name):
    frame[name] = (np.array(frame[name]) - np.mean(frame[name]))/np.std(frame[name])
    return frame

def add_count_vectorizer_features(train, test):
    vectorizer = CountVectorizer(min_df=2, max_df=.9,
                             max_features=None,
                             ngram_range=(2, 10),  # 5 --> 10
                             lowercase=True,
                             analyzer='char_wb', 
                             binary=False)
    TTrainM = vectorizer.fit_transform(train['X'])
    TTestM = vectorizer.transform(test['X'])
    return TTrainM, TTestM

def add_custom_features(train, test):
    _Train = normalize(add_custom(train, (lambda s: len(s)),'_len'),'_len')
    _Test = normalize(add_custom(test, (lambda s: len(s)),'_len'),'_len')
    _Train = _Train.drop(['X','Y','P','shifts'],axis=1)
    _Test = _Test.drop(['X'],axis=1)
    if 'Y' in _Test.columns and 'P' in _Test.columns and 'shifts' in _Test.columns:
        _Test = _Test.drop(['Y','P','shifts'],axis=1)
    return np.array(_Train,dtype=float), np.array(_Test,dtype=float)

def gen_features_frames(train, test):
    train_vect, test_vect = add_count_vectorizer_features(train, test)
    train_my_f, test_my_f = add_custom_features(train, test)
    Train = sprs.hstack([train_vect, sprs.coo_matrix(train_my_f)])
    Test = sprs.hstack([test_vect, sprs.coo_matrix(test_my_f)])
    return Train, Test

def convert_classes(train_ans):
    d = {'A':0, 'N':1, 'V':2}
    return np.array([d[c] for c in train_ans])

def revert_classes(train_ans):
    d = ['A','N','V']
    return np.array([d[c] for c in train_ans])

In [273]:
from joblib import Parallel, delayed
import multiprocessing

In [323]:
def do(train_indices, test_indices, Train, Y,clf, y_name='P'):
    Train_train = Train.loc[train_indices]
    Train_test = Train.loc[test_indices]
    Train_train.index = np.arange(len(train_indices))
    Train_test.index = np.arange(len(test_indices))
    x_train, x_test = gen_features_frames(Train_train, Train_test)
    y_train = Y[train_indices]
    y_test = Y[test_indices]
    clf.fit(sprs.coo_matrix(x_train),y_train)
    y_pred = clf.predict(sprs.coo_matrix(x_test))
    return accuracy_score(y_test, y_pred)


def cross_val(Train, clf, y_name='P', folds=4, jobs=1):
    score = []
    Y = Train[y_name]
    if y_name == 'P':
        Y = convert_classes(Y)
    score = Parallel(n_jobs=jobs)(delayed(do)(train_indices, test_indices, Train, Y,clf, y_name)
                               for train_indices, test_indices 
                               in cross_validation.KFold(len(Y), n_folds = folds))
    return [np.mean(score), np.std(score)]

def pred_parts(Train, Test, clf, y_name = 'P'):
    Y = Train[y_name]
    if y_name == 'P':
        Y = convert_classes(Y)
    train, test = gen_features_frames(Train, Test)
    clf.fit(train, Y)
    y_pred = clf.predict(test)
    return y_pred

In [324]:
clfs = [
        LogisticRegression(penalty='l1',
                    max_iter=1000,
                    C=5,
                    class_weight='balanced',
                    n_jobs=4,
                    verbose=True),
]

In [282]:
%%time
ans = []
for clf in clfs:
    ans.append(cross_val(row_train, clf,'shifts', 4,4))
    print(ans[-1])



[0.91065950169257304, 0.0012740327875784417]
CPU times: user 668 ms, sys: 386 ms, total: 1.05 s
Wall time: 1min 58s


In [325]:
%%time
classes = pred_parts(row_train, row_test, clfs[0])

[LibLinear]CPU times: user 1min 19s, sys: 1.73 s, total: 1min 21s
Wall time: 1min 22s


In [286]:
%%time
offsets = pred_parts(row_train, row_test, clfs[0], 'shifts')

[LibLinear]CPU times: user 2min 53s, sys: 4.86 s, total: 2min 58s
Wall time: 3min 10s


In [294]:
offsets

array([1, 1, 3, ..., 1, 4, 7])

In [None]:
balanced,l1, binary, C=10 -[0.95773262288395533, 0.001451525583466166]
multiclass,l2,newton,C=10,iter-100 0.96
multiclass,l2,lbgfs,C=10,iter-200 0.94

In [314]:
def get_pred_with_prefs_and_classes(y_pred, words, classes):
    ans = []
    i = 0
    for y in y_pred:
        if y is not None:
            if classes[i] in y:
                ans.append(sps.mode(y[classes[i]])[0][0] + '+' + classes[i])
            else:
                if 'V' in y:
                    ans.append(sps.mode(y['V'])[0][0] + '+V')
                else:
                    ans.append(sps.mode(y[[k for k in y][0]])[0][0] + '+' + [k for k in y][0])
        else:
            ans.append(words[i] + '+V')
        i += 1
    return ans

In [326]:
%%time
y_pred,bad = predict_with_prefs(row_test, prefs) 
mult = len([y for y in y_pred if y is not None and len(y) > 1])

CPU times: user 72.4 ms, sys: 2.1 ms, total: 74.5 ms
Wall time: 74 ms


In [327]:
y_pred[0:10]

[{'V': ['gettonare',
   'gettonare',
   'gettonare',
   'gettonare',
   'gettonare',
   'gettonare',
   'gettonare',
   'gettonare',
   'gettonare',
   'gettonare',
   'gettonare',
   'gettonare',
   'gettonare',
   'gettonare',
   'gettonare',
   'gettonare',
   'gettonare',
   'gettonare']},
 {'N': ['incidente']},
 {'V': ['involtare',
   'involtare',
   'involtare',
   'involtare',
   'involtare',
   'involtare',
   'involtare',
   'involtare',
   'involtare',
   'involtare',
   'involtare',
   'involtare',
   'involtare',
   'involtare',
   'involtare']},
 None,
 {'A': ['comunistizzare', 'comunistizzare'],
  'V': ['comunistizzare',
   'comunistizzare',
   'comunistizzare',
   'comunistizzare',
   'comunistizzare',
   'comunistizzare',
   'comunistizzare',
   'comunistizzare',
   'comunistizzare',
   'comunistizzare',
   'comunistizzare',
   'comunistizzare',
   'comunistizzare',
   'comunistizzare',
   'comunistizzare',
   'comunistizzare']},
 {'V': ['vidimare',
   'vidimare',
   'v

In [328]:
%%time
y_pred_full = get_pred_with_prefs_and_classes(y_pred, row_test['X'], classes) 



CPU times: user 2.12 s, sys: 20.3 ms, total: 2.14 s
Wall time: 2.16 s


In [329]:
y_pred_full[0:20]

['gettonare+V',
 'incidente+N',
 'involtare+V',
 'lievi+V',
 'comunistizzare+V',
 'vidimare+V',
 'imbrodare+V',
 'strillare+V',
 'cifrare+V',
 'compassare+V',
 'cucire+V',
 'snobbare+V',
 'tessere+V',
 'coagulare+V',
 'somatizzare+V',
 'impoverire+A',
 'smungere+V',
 'abbuffare+V',
 'meravigliare+V',
 'risucchiare+V']

In [298]:
def save_ans(_y_test, filename):
    print(_y_test)
    names = np.arange(len(_y_test)) + 1
    ans = pd.DataFrame(data=np.matrix(_y_test).T, columns=['Category'])
    ans = pd.concat((pd.DataFrame(names,columns=['Id']),ans),axis=1)
    ans.to_csv(filename,index=None)

In [None]:
save_ans(y_pred_full, 'contest2_best_pref_dict_with_predict_class_more')

['gettonare+V', 'incidente+N', 'involtare+V', 'lievi+V', 'comunistizzare+V', 'vidimare+V', 'imbrodare+V', 'strillare+V', 'cifrare+V', 'compassare+V', 'cucire+V', 'snobbare+V', 'tessere+V', 'coagulare+V', 'somatizzare+V', 'impoverire+A', 'smungere+V', 'abbuffare+V', 'meravigliare+V', 'risucchiare+V', 'sesquipedale+A', 'timido+A', 'nauseare+V', 'ingozzare+V', 'stimare+V', 'relazionare+V', 'sorridere+V', 'illuminare+V', 'concitare+V', 'dissecare+V', 'deregolamentare+V', 'elettrizzare+V', 'ripiovere+V', 'ottimizzare+V', 'accordare+V', 'barrare+V', 'arguire+V', 'sfuggire+V', 'ritorcere+V', 'rifiatare+V', 'sgobbare+V', 'adoprare+V', 'abbigliare+V', 'complimentare+V', 'plurale+N', 'soprassedere+V', 'affermare+V', 'frusciare+V', 'amareggiare+V', 'autoalimentare+V', 'escutere+V', 'secco+A', 'talentare+V', 'derapare+V', 'distinguere+V', 'frizzare+V', 'sincopare+V', 'tentacolare+V', 'scomporre+A', 'micro+N', 'disinnestare+V', 'foracchiare+V', 'fertilizzare+V', 'smaniare+V', 'infischiare+V', 'semi



'b'

In [242]:
a = [['1','0'],['1','0'],['1','1']]
sps.mode(a,axis=0)[0][0]



'0'

Unnamed: 0,id,X,Y,P
0,0,vergognerete,vergognare,V
2,2,menomazione,menomazione,N


In [38]:
m = sprs.coo_matrix([[0,1],[0,0]])
print(m.toarray()[[0,1]])

[[0 1]
 [0 0]]


In [157]:
a = pd.DataFrame([[1,3],[2,2],[3,1]])
a.head()

Unnamed: 0,0,1
0,1,3
1,2,2
2,3,1


In [220]:
a = [1,2,3,2]
sps.mode(a)[0][0]

2