In [1]:
import sqlite3
from pyspark.context import SparkContext
from os import path
from pyspark.ml.feature import HashingTF, IDF, Tokenizer

In [2]:
sc = SparkContext.getOrCreate()

In [3]:
alltweets = []

## Connecting to the database

In [4]:
def create_connection(database):
    try:
        conn = sqlite3.connect(database)
        return conn
    except Exception as e:
        print(e)

    return None

In [5]:
def getTweets(username): 
    sql = '''select tweet_text from tweets where user_screen_name=?'''
    database = "../twitterApp/twitter.sqlite"
    conn = create_connection(database)
    
    cur = conn.cursor()
    cur.execute(sql,(username,))
    
    tweets = cur.fetchall()
    
    usertweets = []
    
    for tweet in tweets: 
        
        usertweets.append((tweet[0]))
    
    #alltweets.extend(' '.join(usertweets))
    
    return usertweets

In [39]:
def getTrueNegatives(database):
    conn = create_connection(database)
    sql = '''select user_screen_name from search_results where isDepressed=0 ORDER BY ROWID DESC LIMIT 322'''
    cur = conn.cursor()
    cur.execute(sql)
    users = cur.fetchall()
    
    return users
    #for user in users: 
    #   user_tweets = getTweets(user)

In [7]:
def getTruePositives(database):    
    conn = create_connection(database)
    sql = '''select user_screen_name from search_results where isDepressed="True"'''
    cur = conn.cursor()
    cur.execute(sql)
    users = cur.fetchall()
    
    return users
    #for user in users: 
    #    user_tweets = getTweets(str(user))

In [8]:
from pyspark.ml.feature import HashingTF, IDF, Tokenizer

def gettfidf(usertweets): 
    temp_df = spark.createDataFrame(usertweets,['tweet'])
    tokenizer = Tokenizer(inputCol="tweet", outputCol="words")
    wordsData = tokenizer.transform(temp_df)

    hashingTF = HashingTF(inputCol="words", outputCol="rawFeatures", numFeatures=20)
    featurizedData = hashingTF.transform(wordsData)
    idf = IDF(inputCol="rawFeatures", outputCol="features")
    idfModel = idf.fit(featurizedData)
    rescaledData = idfModel.transform(featurizedData)
    
    return rescaledData.select("features")

In [9]:
test = getTruePositives("../twitterApp/twitter.sqlite")

In [10]:
alltweets = []

In [11]:
def remove_nonascii(text):
    return ''.join([i if ord(i) < 128 else ' ' for i in text])

In [14]:
for user in test: 
    t = getTweets(user[0])    
    alltweets.append(remove_nonascii(' '.join(t)))

## Generating the RDDs for users

In [40]:
trueNegatives = getTrueNegatives("../twitterApp/twitter.sqlite")
truePositives = getTruePositives("../twitterApp/twitter.sqlite")

In [42]:
alltweets = []

In [43]:
for user in trueNegatives: 
    t = getTweets(user[0]) 
    alltweets.append([user[0], 0, remove_nonascii(' '.join(t))])

for user in truePositives:
    t = getTweets(user[0])
    alltweets.append([user[0], 1, remove_nonascii(' '.join(t))]) 

In [44]:
tweetRDD = sc.parallelize(alltweets).cache()

In [48]:
tweetDF = tweetRDD.toDF(["username","label","tweet"])

In [49]:
tweetDF.printSchema()

root
 |-- username: string (nullable = true)
 |-- label: long (nullable = true)
 |-- tweet: string (nullable = true)



## Computing TF-IDF

In [52]:
from pyspark.ml.feature import HashingTF, IDF, Tokenizer

tokenizer = Tokenizer(inputCol="tweet", outputCol="words")
wordsData = tokenizer.transform(tweetDF)
hashingTF = HashingTF(inputCol="words", outputCol="rawFeatures", numFeatures=20)
featurizedData = hashingTF.transform(wordsData)
idf = IDF(inputCol="rawFeatures", outputCol="features")
idfModel = idf.fit(featurizedData)
tfidftweets = idfModel.transform(featurizedData)

In [53]:
tfidftweets.show()

+---------------+-----+--------------------+--------------------+--------------------+--------------------+
|       username|label|               tweet|               words|         rawFeatures|            features|
+---------------+-----+--------------------+--------------------+--------------------+--------------------+
|   PopoffSierra|    0|RT @jacksfilms: "...|[rt, @jacksfilms:...|(20,[0,1,2,3,4,5,...|(20,[0,1,2,3,4,5,...|
|      dhenwikan|    0|Carnival and He -...|[carnival, and, h...|(20,[0,1,2,3,4,5,...|(20,[0,1,2,3,4,5,...|
| JosephineAlice|    0|Guess I'll just r...|[guess, i'll, jus...|(20,[0,1,2,3,4,5,...|(20,[0,1,2,3,4,5,...|
|   JoseJaimes95|    0|Being able to mut...|[being, able, to,...|(20,[0,1,2,3,4,5,...|(20,[0,1,2,3,4,5,...|
|      lyriczbot|    0|I miss that happy...|[i, miss, that, h...|(20,[0,1,2,3,4,5,...|(20,[0,1,2,3,4,5,...|
|  grillmeeting1|    0|@1HkipUrdKrXXzdk ...|[@1hkipurdkrxxzdk...|(20,[0,1,2,3,4,5,...|(20,[0,1,2,3,4,5,...|
|bustercalamity1|    0|@marc

## Normalizing the vectors

In [60]:
from pyspark.ml.feature import Normalizer
from pyspark.ml.linalg import Vectors

normalizer = Normalizer(inputCol="features", outputCol="normFeatures", p=1.0)
l1NormData = normalizer.transform(tfidftweets)
print("Normalized using L^1 norm")
l1NormData.select("normFeatures").show(truncate=False)

Normalized using L^1 norm
+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
|normFeatures                                                                                                                                                                                                                                                                                                                                                                                                                                                                         |
+-----------------------------

## Helper function

In [66]:
from __future__ import division
def measures(tp,tn,fp,fn):
    precision = tp/(tp+fp)
    recall = tp/(tp+fn)
    specificity = tn/(tn+fp)
    accuracy = (tp+tn)/(tp+tn+fp+fn)
    f1_score = (2*tp)/((2*tp)+fp+fn)
    return precision, recall, specificity, accuracy, f1_score

def howgoodisit(result):
    true_positives = result.filter(lambda line: line[1]==0.0 and line[0]==0.0).count()
    true_negatives = result.filter(lambda line: line[1]==1.0 and line[0]==1.0).count()
    false_positives = result.filter(lambda line: line[1]==0.0 and line[0]==1.0).count()
    false_negatives = result.filter(lambda line: line[1]==1.0 and line[0]==0.0).count()
    return measures(true_positives, true_negatives, false_positives, false_negatives)

## Evaluating NormFeatures and TF-IDF features

In [61]:
from pyspark.ml import Pipeline
from pyspark.ml.classification import NaiveBayes
from pyspark.ml.evaluation import MulticlassClassificationEvaluator
from pyspark.ml.tuning import ParamGridBuilder
from pyspark.ml.tuning import CrossValidator
from pyspark.ml.classification import LogisticRegression
from pyspark.ml.classification import DecisionTreeClassifier
from pyspark.ml.classification import MultilayerPerceptronClassifier

In [62]:
splits = l1NormData.randomSplit([0.6, 0.4])
training_df = splits[0]
test_df = splits[1]

## Logistic Regression

In [63]:
lr = (LogisticRegression()).setMaxIter(1000).setRegParam(0.01)

lr_pipeline = Pipeline(stages=[lr])

lr_paramGrid = ParamGridBuilder().addGrid(lr.regParam, [0.01, 0.1, 0.5,1.0, 2.0]).build()

lr_cv = CrossValidator(estimator=lr_pipeline, 
                    estimatorParamMaps=lr_paramGrid, 
                    evaluator=MulticlassClassificationEvaluator(), 
                    numFolds=4)

## Evaluating TF-IDF features

In [75]:
trainData = training_df.select("label", "features")
testData = test_df.select("label", "features")

In [65]:
lr_cvModel1 = lr_cv.fit(trainData)
lr_result1 = lr_cvModel1.transform(testData)

In [67]:
lr_evaluator = MulticlassClassificationEvaluator(predictionCol="prediction")
lr_evaluator.evaluate(lr_result1, {lr_evaluator.metricName: "accuracy"})

0.6327272727272727

In [71]:
result = lr_result1.select("label", "prediction").rdd.map(list)

precision, recall, specificity, accuracy, f1_score = howgoodisit(result)

print "precision: ", precision
print "recall: ", recall
print "specificity: ", specificity
print "accuracy: ", accuracy
print "f1_score: ", f1_score

precision:  0.63768115942
recall:  0.63309352518
specificity:  0.632352941176
accuracy:  0.632727272727
f1_score:  0.635379061372


## Evaluating L1 Norm features

In [76]:
L1trainData = training_df.selectExpr("label", "normFeatures as features")
L1testData = test_df.selectExpr("label", "normFeatures as features")

In [77]:
lr_cvModel2 = lr_cv.fit(L1trainData)
lr_result2 = lr_cvModel2.transform(L1testData)

In [78]:
lr_evaluator_norm = MulticlassClassificationEvaluator(predictionCol="prediction")
lr_evaluator_norm.evaluate(lr_result2, {lr_evaluator_norm.metricName: "accuracy"})

0.6436363636363637

In [79]:
result = lr_result2.select("label", "prediction").rdd.map(list)

precision, recall, specificity, accuracy, f1_score = howgoodisit(result)

print "precision: ", precision
print "recall: ", recall
print "specificity: ", specificity
print "accuracy: ", accuracy
print "f1_score: ", f1_score

precision:  0.672268907563
recall:  0.575539568345
specificity:  0.713235294118
accuracy:  0.643636363636
f1_score:  0.62015503876


## Naive Bayes

In [80]:
nb = NaiveBayes()
pipeline = Pipeline(stages=[nb])
paramGrid = ParamGridBuilder().addGrid(nb.smoothing, [0.0, 1.0]).build()


cv = CrossValidator(estimator=pipeline, 
                    estimatorParamMaps=paramGrid, 
                    evaluator=MulticlassClassificationEvaluator(), 
                    numFolds=4)

## Evaluating TF-IDF Features

In [81]:
nb_model1 = cv.fit(trainData)
nb_result = nb_model1.transform(testData)

In [82]:
nb_evaluator_1 = MulticlassClassificationEvaluator(predictionCol="prediction")
nb_evaluator_1.evaluate(nb_result, {nb_evaluator_1.metricName: "accuracy"})

0.5345454545454545

In [83]:
result = nb_result.select("label", "prediction").rdd.map(list)

precision, recall, specificity, accuracy, f1_score = howgoodisit(result)

print "precision: ", precision
print "recall: ", recall
print "specificity: ", specificity
print "accuracy: ", accuracy
print "f1_score: ", f1_score

precision:  0.526570048309
recall:  0.784172661871
specificity:  0.279411764706
accuracy:  0.534545454545
f1_score:  0.630057803468


## Evaluating L1-Norm Features

In [84]:
nb_model2 = cv.fit(L1trainData)
nb_result_2 = nb_model2.transform(L1testData)

In [85]:
nb_evaluator_2 = MulticlassClassificationEvaluator(predictionCol="prediction")
nb_evaluator_2.evaluate(nb_result_2, {nb_evaluator_2.metricName: "accuracy"})

0.5381818181818182

In [86]:
result = nb_result_2.select("label", "prediction").rdd.map(list)

precision, recall, specificity, accuracy, f1_score = howgoodisit(result)

print "precision: ", precision
print "recall: ", recall
print "specificity: ", specificity
print "accuracy: ", accuracy
print "f1_score: ", f1_score

precision:  1.0
recall:  0.0863309352518
specificity:  1.0
accuracy:  0.538181818182
f1_score:  0.158940397351


## Decision Tree 

In [87]:
dt = DecisionTreeClassifier()


dt_pipeline = Pipeline(stages=[dt])
dt_paramGrid = (ParamGridBuilder()
             .addGrid(dt.maxDepth, [1,2,6,10])
             .addGrid(dt.maxBins, [20,40,80])
             .build())

dt_cv = CrossValidator(estimator=dt_pipeline, 
                    estimatorParamMaps=dt_paramGrid, 
                    evaluator=MulticlassClassificationEvaluator(), 
                    numFolds=4)

## Evaluating TF-IDF Features

In [88]:
dt_model_1 = dt_cv.fit(trainData)
dt_result_1 = dt_model_1.transform(testData)

In [89]:
dt_evaluator_1 = MulticlassClassificationEvaluator(predictionCol="prediction")
dt_evaluator_1.evaluate(dt_result_1, {dt_evaluator_1.metricName: "accuracy"})

0.5927272727272728

In [90]:
result = dt_result_1.select("label", "prediction").rdd.map(list)

precision, recall, specificity, accuracy, f1_score = howgoodisit(result)

print "precision: ", precision
print "recall: ", recall
print "specificity: ", specificity
print "accuracy: ", accuracy
print "f1_score: ", f1_score

precision:  0.58282208589
recall:  0.68345323741
specificity:  0.5
accuracy:  0.592727272727
f1_score:  0.629139072848


## Evaluating L1-Norm features

In [91]:
dt_model_2 = dt_cv.fit(L1trainData)
dt_result_2 = dt_model_2.transform(L1testData)

In [92]:
dt_evaluator_2 = MulticlassClassificationEvaluator(predictionCol="prediction")
dt_evaluator_2.evaluate(dt_result_2, {dt_evaluator_2.metricName: "accuracy"})

0.5890909090909091

In [93]:
result = dt_result_2.select("label", "prediction").rdd.map(list)

precision, recall, specificity, accuracy, f1_score = howgoodisit(result)

print "precision: ", precision
print "recall: ", recall
print "specificity: ", specificity
print "accuracy: ", accuracy
print "f1_score: ", f1_score

precision:  0.62037037037
recall:  0.482014388489
specificity:  0.698529411765
accuracy:  0.589090909091
f1_score:  0.542510121457


## MLP 

In [94]:
layers = [20, 5, 4, 2]

mlp = MultilayerPerceptronClassifier(maxIter=100, layers=layers, blockSize=128, seed=1234)

mlp_pipeline = Pipeline(stages=[mlp])

## Evaluating TF-IDF Features

In [95]:
mlp_model_1 = mlp_pipeline.fit(trainData)
mlp_result_1 = mlp_model_1.transform(testData)

In [97]:
mlp_evaluator = MulticlassClassificationEvaluator(metricName="accuracy")
mlp_evaluator.evaluate(mlp_result_1, {mlp_evaluator.metricName: "accuracy"})

0.5963636363636363

In [98]:
result = mlp_result_1.select("label", "prediction").rdd.map(list)

precision, recall, specificity, accuracy, f1_score = howgoodisit(result)

print "precision: ", precision
print "recall: ", recall
print "specificity: ", specificity
print "accuracy: ", accuracy
print "f1_score: ", f1_score

precision:  0.586419753086
recall:  0.68345323741
specificity:  0.507352941176
accuracy:  0.596363636364
f1_score:  0.63122923588


## Evaluating L1-Norm Features

In [99]:
mlp_model_2 = mlp_pipeline.fit(L1trainData)
mlp_result_2 = mlp_model_2.transform(L1testData)

In [100]:
mlp_evaluator_2 = MulticlassClassificationEvaluator(metricName="accuracy")
mlp_evaluator_2.evaluate(mlp_result_2, {mlp_evaluator_2.metricName: "accuracy"})

0.6727272727272727

In [101]:
result = mlp_result_2.select("label", "prediction").rdd.map(list)

precision, recall, specificity, accuracy, f1_score = howgoodisit(result)

print "precision: ", precision
print "recall: ", recall
print "specificity: ", specificity
print "accuracy: ", accuracy
print "f1_score: ", f1_score

precision:  0.757894736842
recall:  0.517985611511
specificity:  0.830882352941
accuracy:  0.672727272727
f1_score:  0.615384615385


In [111]:
from sgd import learn_coefficients
import numpy as np

## Converting the different classifier outputs to a numpy array

In [112]:
lr_result1.show()

+-----+--------------------+--------------------+--------------------+----------+
|label|            features|       rawPrediction|         probability|prediction|
+-----+--------------------+--------------------+--------------------+----------+
|    0|(20,[0,1,2,3,4,5,...|[-0.0036995367057...|[0.49907511687843...|       1.0|
|    0|(20,[0,1,2,3,4,5,...|[-0.9922911163698...|[0.27045977686482...|       1.0|
|    0|(20,[0,1,2,3,4,5,...|[0.68244596516296...|[0.66428439266346...|       0.0|
|    0|(20,[0,1,2,3,4,5,...|[1.22774176661866...|[0.77342308623081...|       0.0|
|    0|(20,[0,1,2,3,4,5,...|[0.56800795812552...|[0.63830339494098...|       0.0|
|    0|(20,[0,1,2,3,4,5,...|[-1.0007345973081...|[0.26879701529034...|       1.0|
|    0|(20,[0,1,2,3,4,5,...|[1.18987142502024...|[0.76671806790702...|       0.0|
|    0|(20,[0,1,2,3,4,5,...|[-0.2807579648069...|[0.43026796069406...|       1.0|
|    0|(20,[0,1,2,3,4,5,...|[-0.0013599346115...|[0.49966001639949...|       1.0|
|    0|(20,[0,1,

In [113]:
nb_result.show()

+-----+--------------------+--------------------+--------------------+----------+
|label|            features|       rawPrediction|         probability|prediction|
+-----+--------------------+--------------------+--------------------+----------+
|    0|(20,[0,1,2,3,4,5,...|[-718.58593357458...|[0.05490157636335...|       1.0|
|    0|(20,[0,1,2,3,4,5,...|[-1113.4349500029...|[0.87486860228217...|       0.0|
|    0|(20,[0,1,2,3,4,5,...|[-745.78690531928...|[0.09762173690099...|       1.0|
|    0|(20,[0,1,2,3,4,5,...|[-1238.7617135250...|[0.99824930408976...|       0.0|
|    0|(20,[0,1,2,3,4,5,...|[-1387.1212484201...|[0.99937252957515...|       0.0|
|    0|(20,[0,1,2,3,4,5,...|[-1635.3387090240...|[0.98502525072148...|       0.0|
|    0|(20,[0,1,2,3,4,5,...|[-1169.9186747404...|[0.93391376051773...|       0.0|
|    0|(20,[0,1,2,3,4,5,...|[-920.56844999794...|[0.77661013211230...|       0.0|
|    0|(20,[0,1,2,3,4,5,...|[-747.01153742449...|[0.00143805088426...|       1.0|
|    0|(20,[0,1,

In [114]:
dt_result_1.show()

+-----+--------------------+-------------+--------------------+----------+
|label|            features|rawPrediction|         probability|prediction|
+-----+--------------------+-------------+--------------------+----------+
|    0|(20,[0,1,2,3,4,5,...| [100.0,83.0]|[0.54644808743169...|       0.0|
|    0|(20,[0,1,2,3,4,5,...| [47.0,101.0]|[0.31756756756756...|       1.0|
|    0|(20,[0,1,2,3,4,5,...| [100.0,83.0]|[0.54644808743169...|       0.0|
|    0|(20,[0,1,2,3,4,5,...| [47.0,101.0]|[0.31756756756756...|       1.0|
|    0|(20,[0,1,2,3,4,5,...| [47.0,101.0]|[0.31756756756756...|       1.0|
|    0|(20,[0,1,2,3,4,5,...| [47.0,101.0]|[0.31756756756756...|       1.0|
|    0|(20,[0,1,2,3,4,5,...| [100.0,83.0]|[0.54644808743169...|       0.0|
|    0|(20,[0,1,2,3,4,5,...| [47.0,101.0]|[0.31756756756756...|       1.0|
|    0|(20,[0,1,2,3,4,5,...| [100.0,83.0]|[0.54644808743169...|       0.0|
|    0|(20,[0,1,2,3,4,5,...| [100.0,83.0]|[0.54644808743169...|       0.0|
|    0|(20,[0,1,2,3,4,5,.

In [115]:
mlp_result_1.show()

+-----+--------------------+----------+
|label|            features|prediction|
+-----+--------------------+----------+
|    0|(20,[0,1,2,3,4,5,...|       0.0|
|    0|(20,[0,1,2,3,4,5,...|       1.0|
|    0|(20,[0,1,2,3,4,5,...|       0.0|
|    0|(20,[0,1,2,3,4,5,...|       0.0|
|    0|(20,[0,1,2,3,4,5,...|       0.0|
|    0|(20,[0,1,2,3,4,5,...|       1.0|
|    0|(20,[0,1,2,3,4,5,...|       0.0|
|    0|(20,[0,1,2,3,4,5,...|       0.0|
|    0|(20,[0,1,2,3,4,5,...|       0.0|
|    0|(20,[0,1,2,3,4,5,...|       0.0|
|    0|(20,[0,1,2,3,4,5,...|       0.0|
|    0|(20,[0,1,2,3,4,5,...|       0.0|
|    0|(20,[0,1,2,3,4,5,...|       0.0|
|    0|(20,[0,1,2,3,4,5,...|       1.0|
|    0|(20,[0,1,2,3,4,5,...|       1.0|
|    0|(20,[0,1,2,3,4,5,...|       0.0|
|    0|(20,[0,1,2,3,4,5,...|       0.0|
|    0|(20,[0,1,2,3,4,5,...|       0.0|
|    0|(20,[0,1,2,3,4,5,...|       1.0|
|    0|(20,[0,1,2,3,4,5,...|       1.0|
+-----+--------------------+----------+
only showing top 20 rows



## Linear Combination of Classifiers

In [126]:
label_column = lr_result1.select("label").collect()
lr_column = lr_result1.select("prediction").collect()
mlp_column = mlp_result_1.select("prediction").collect()
nb_column = nb_result.select("prediction").collect()
dt_column = dt_result_1.select("prediction").collect()

In [127]:
temp_label = [i[0] for i in label_column]
temp_lr = [i[0] for i in lr_column]
temp_mlp = [i[0] for i in mlp_column]
temp_nb = [i[0] for i in nb_column]
temp_dt = [i[0] for i in dt_column]

In [139]:
X = np.array([temp_lr,temp_mlp, temp_nb, temp_dt]).transpose()
y = np.array([temp_label]).transpose()

In [140]:
learned_weights = learn_coefficients(X, y, 0.001, 1000)

In [143]:
print "Learned Weights for(without using normalized features):"
print "Logistic Regression", learned_weights[0]
print "Multilayer Perceptron", learned_weights[1]
print "Naive Bayes", learned_weights[2]
print "Decision Tree", learned_weights[3]

Learned Weights for(without using normalized features):
Logistic Regression 0.186848353761
Multilayer Perceptron 0.288891432644
Naive Bayes 0.333243064381
Decision Tree 0.267154165881


## Combining the classifier outputs with the help of another classifier

In [154]:
#Using sklearn SVM here as Spark doesn't support SVM with RBF
from sklearn import svm

In [156]:
svm_rbf = svm.SVC()
svm_rbf.fit(X,np.ravel(y))

SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape=None, degree=3, gamma='auto', kernel='rbf',
  max_iter=-1, probability=False, random_state=None, shrinking=True,
  tol=0.001, verbose=False)

In [157]:
svm_rbf_predictions = svm_rbf.predict(X)

In [159]:
from sklearn.metrics import confusion_matrix
tn, fp, fn, tp = confusion_matrix(y, svm_rbf_predictions).ravel()

precision, recall, specificity, accuracy, f1_score = measures(tp,tn,fp,fn)

print "precision: ", precision
print "recall: ", recall
print "specificity: ", specificity
print "accuracy: ", accuracy
print "f1_score: ", f1_score

precision:  0.627737226277
recall:  0.632352941176
specificity:  0.63309352518
accuracy:  0.632727272727
f1_score:  0.630036630037


## combination of classifiers with logistic regression classifier

In [160]:
from sklearn import linear_model

In [161]:
meta_lr = linear_model.LogisticRegression(solver='lbfgs')
meta_lr.fit(X, y)

LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
          intercept_scaling=1, max_iter=100, multi_class='ovr', n_jobs=1,
          penalty='l2', random_state=None, solver='lbfgs', tol=0.0001,
          verbose=0, warm_start=False)

In [162]:
meta_lr_predictions = meta_lr.predict(X)

In [163]:
tn, fp, fn, tp = confusion_matrix(y, meta_lr_predictions).ravel()

precision, recall, specificity, accuracy, f1_score = measures(tp,tn,fp,fn)

print "precision: ", precision
print "recall: ", recall
print "specificity: ", specificity
print "accuracy: ", accuracy
print "f1_score: ", f1_score

precision:  0.646616541353
recall:  0.632352941176
specificity:  0.661870503597
accuracy:  0.647272727273
f1_score:  0.639405204461


In [167]:
meta_learned_weights = meta_lr.coef_

print "Learned Weights for classifiers using logistic regression as the meta-classifier:"
print "Logistic Regression", meta_learned_weights[0][0]
print "Multilayer Perceptron", meta_learned_weights[0][1]
print "Naive Bayes", meta_learned_weights[0][2]
print "Decision Tree", meta_learned_weights[0][3]

Learned Weights for classifiers using logistic regression as the meta-classifier:
Logistic Regression 0.667609742943
Multilayer Perceptron 0.545491285197
Naive Bayes 0.44523088225
Decision Tree 0.394552896063
