In [108]:
import numpy as np
import pandas as pd
import matplotlib
matplotlib.use('Agg')
import matplotlib.pyplot as plt
from scipy.stats import linregress
import os
import tensorflow as tf

In [161]:
class Model:
    def __init__(self, model_dir):
        #self.model = self.build_model(model_dir)
        self.model = self.build_model_regu(model_dir)

    #Build Model
    def build_model(self, model_dir):
        continuous_features = [tf.feature_column.numeric_column(str(k)) for k in FEATURES]
        model = tf.estimator.LinearClassifier(
            n_classes = 3,
            model_dir=model_dir,
            feature_columns= continuous_features)
        return model
        #Build Model
    def build_model_regu(self, model_dir):
        continuous_features = [tf.feature_column.numeric_column(str(k)) for k in FEATURES]
        model = tf.estimator.LinearClassifier(
            n_classes = 3,
            model_dir=model_dir,
            feature_columns= continuous_features,
            optimizer=tf.train.FtrlOptimizer(
                learning_rate=0.1,
                l1_regularization_strength=0.9,
                l2_regularization_strength=5))
        return model

    #Train Model
    def train_model(self, df_train):
        self.model.train(input_fn=get_input_fn(df_train,
                                               num_epochs=None,
                                               n_batch = 16,
                                               shuffle=False),
                         steps=128)

    #Evaluate Model
    def eval_model(self, df_test):
        self.model.evaluate(input_fn=get_input_fn(df_test,
                                                  num_epochs=None,
                                                  n_batch = 16,
                                                  shuffle=False),
                            steps=128)
    #Make Prediction with Model
    def makePrediction(self, df):
        outputDF = df.copy()
        pred_iter = self.model.predict(tf.estimator.inputs.pandas_input_fn(df, shuffle=False))
        probabilities = []
        classifier = []

        for i in pred_iter:
            probabilities.append(i['logits'])
            classifier.append(i['class_ids'])
        outputDF['logits'] = probabilities
        outputDF['class_id'] = classifier
        return outputDF
def get_input_fn(data_set, num_epochs=None, n_batch = 16, shuffle=True):
    return tf.estimator.inputs.pandas_input_fn(
        x=pd.DataFrame({str(k): data_set[k].values for k in FEATURES}),
        y = pd.Series(data_set['category'].values),
        batch_size=n_batch,
        num_epochs=num_epochs,
        shuffle=shuffle)

def monteCarloTrainingData(singular_list, numSets=10, dataPts=128):
    allsets = []
    mu = np.mean(singular_list)
    sigma = np.std(singular_list)
    #print(mu, sigma)
    for _ in range(numSets):
        gen = np.random.normal(mu, sigma, dataPts)
        allsets.append(gen)
    return allsets

def createTrainingDF(allsets, category):
    trainingDF = pd.DataFrame(columns=FEATURES)
    for index in range(len(allsets)):
        trainingDF[FEATURES[index]] = allsets[index]
    trainingDF["category"] = category
    return trainingDF

def generateDF(dilutions_dict, category, numSets=10):
    allDat = {}
    for key in dilutions_dict.keys():
        allDat[key] = monteCarloTrainingData(dilutions_dict[key])
    trainingDF = []
    for index in range(numSets):
        trainingDF.append(createTrainingDF([allDat[key][index] for key in allDat.keys()], category))
    return trainingDF


FEATURES = ["12.5x", "20x", "50x", "125x", "250x", "500x"]

METADATA = ["Date:", "Time:", "Measurement mode:", "Excitation wavelength:",
            "Emission wavelength:", "Excitation bandwidth:", "Emission bandwidth:",
            "Gain (Manual):", "Number of reads:", "FlashMode:", "Integration time:", "Lag time:",
            "Part of the plate:", "Target Temperature:", "Current Temperature:"]

class PipetteTutorial:
    def __init__(self, excelname, save_loc):
        excel = os.path.join(save_loc, excelname)
        self.df = pd.read_excel(excel, sheet_name=0)
        self.metadata = self.parseMetadata()
        self.data = self.parseData()
    def parseMetadata(self):
        df = self.df
        metadata = {}
        def parse(name):
            df2 = df.loc[df[df.columns[0]]==name].dropna(axis=1)
            return df2[df2.columns[1]].iloc[0]
        for item in METADATA:
            metadata[item] = parse(item)
        return metadata
    def parseData(self):
        data = {}
        df = self.df
        start_index = df.loc[df[df.columns[0]]=='<>'].index[0]
        df2 = df.iloc[start_index+1:]
        index = 'A'
        for row in df2.iterrows():
            df_row = list(row[1][1:])
            if "..." not in df_row:
                data[index] = df_row
                index = chr(ord(index) + 1)
        return pd.DataFrame(data)
    def dilutionLine(self, row, save_loc, df):
        Output = []
        print(df)
        plt.plot(df[row])
        plt.title(row)
        Output.append(str(row) + ": " +  str(linregress(df[row], df.index)))
        plt.savefig(os.path.join(save_loc, row+"_lineplot.png"))
        plt.close()
        return Output

In [162]:
trial2 = PipetteTutorial("trial_2_data.xlsx", "./")

In [163]:
dilutions = {}
dilutions["12.5x"] = list(trial2.data.iloc[6])
dilutions["20x"] = list(trial2.data.iloc[7])
dilutions["50x"] = list(trial2.data.iloc[8])
dilutions["125x"] = list(trial2.data.iloc[9])
dilutions["250x"] = list(trial2.data.iloc[10])
dilutions["500x"] = list(trial2.data.iloc[11])
dilutions

{'12.5x': [2972.0, 3268.0, 3241.0, 3061.0, 2151.0, 3133.0, 3100.0, 2928.0],
 '20x': [1961.0, 2064.0, 2172.0, 1975.0, 1994.0, 2024.0, 1985.0, 2113.0],
 '50x': [1164.0, 1166.0, 1202.0, 1143.0, 1119.0, 1103.0, 1087.0, 1058.0],
 '125x': [381.0, 448.0, 468.0, 483.0, 520.0, 468.0, 520.0, 496.0],
 '250x': [168.0, 283.0, 275.0, 289.0, 281.0, 287.0, 285.0, 290.0],
 '500x': [181.0, 132.0, 139.0, 146.0, 150.0, 149.0, 151.0, 160.0]}

In [164]:
lower_dilutions = {}
for key in dilutions.keys():
    lower_dilutions[key] = [x*.7 for x in dilutions[key]]
lower_dilutions

{'12.5x': [2080.4,
  2287.6,
  2268.7,
  2142.7,
  1505.6999999999998,
  2193.1,
  2170.0,
  2049.6],
 '20x': [1372.6999999999998,
  1444.8,
  1520.3999999999999,
  1382.5,
  1395.8,
  1416.8,
  1389.5,
  1479.1],
 '50x': [814.8,
  816.1999999999999,
  841.4,
  800.0999999999999,
  783.3,
  772.0999999999999,
  760.9,
  740.5999999999999],
 '125x': [266.7,
  313.59999999999997,
  327.59999999999997,
  338.09999999999997,
  364.0,
  327.59999999999997,
  364.0,
  347.2],
 '250x': [117.6,
  198.1,
  192.5,
  202.29999999999998,
  196.7,
  200.89999999999998,
  199.5,
  203.0],
 '500x': [126.69999999999999,
  92.39999999999999,
  97.3,
  102.19999999999999,
  105.0,
  104.3,
  105.69999999999999,
  112.0]}

In [165]:
greater_dilutions = {}
for key in dilutions.keys():
    greater_dilutions[key] = [x*1.3 for x in dilutions[key]]
greater_dilutions

{'12.5x': [3863.6,
  4248.400000000001,
  4213.3,
  3979.3,
  2796.3,
  4072.9,
  4030.0,
  3806.4],
 '20x': [2549.3,
  2683.2000000000003,
  2823.6,
  2567.5,
  2592.2000000000003,
  2631.2000000000003,
  2580.5,
  2746.9],
 '50x': [1513.2,
  1515.8,
  1562.6000000000001,
  1485.9,
  1454.7,
  1433.9,
  1413.1000000000001,
  1375.4],
 '125x': [495.3, 582.4, 608.4, 627.9, 676.0, 608.4, 676.0, 644.8000000000001],
 '250x': [218.4, 367.90000000000003, 357.5, 375.7, 365.3, 373.1, 370.5, 377.0],
 '500x': [235.3,
  171.6,
  180.70000000000002,
  189.8,
  195.0,
  193.70000000000002,
  196.3,
  208.0]}

In [166]:
train_lower_dil = generateDF(lower_dilutions, 0)
train_correct_dil = generateDF(dilutions, 1)
train_greater_dil = generateDF(greater_dilutions, 2)

In [167]:
model = Model("./model12")
for index in range(3):
    model.train_model(train_lower_dil[index])
    model.train_model(train_correct_dil[index])
    model.train_model(train_greater_dil[index])

INFO:tensorflow:Using default config.
INFO:tensorflow:Using config: {'_model_dir': './model12', '_tf_random_seed': None, '_save_summary_steps': 100, '_save_checkpoints_steps': None, '_save_checkpoints_secs': 600, '_session_config': allow_soft_placement: true
graph_options {
  rewrite_options {
    meta_optimizer_iterations: ONE
  }
}
, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': 100, '_train_distribute': None, '_device_fn': None, '_protocol': None, '_eval_distribute': None, '_experimental_distribute': None, '_service': None, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x1a2c03bc50>, '_task_type': 'worker', '_task_id': 0, '_global_id_in_cluster': 0, '_master': '', '_evaluation_master': '', '_is_chief': True, '_num_ps_replicas': 0, '_num_worker_replicas': 1}
INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Create CheckpointSaverHook.
INFO:tensorflow:Graph was finaliz

In [168]:
def performTest(df):
    test_data = {}
    index = 0
    for val in df["A"].iloc[:6]:
        test_data[FEATURES[index]] = [val]
        index += 1
    return test_data
model.makePrediction(pd.DataFrame(performTest(trial2.data)))

INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from ./model12/model.ckpt-1152
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.


Unnamed: 0,12.5x,20x,50x,125x,250x,500x,logits,class_id
0,3047.0,2144.0,1182.0,558.0,340.0,171.0,"[-237.66776, -241.07433, 178.78377]",[2]


In [169]:
checkAccuracy(model.makePrediction(train_lower_dil[4]))

INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from ./model12/model.ckpt-1152
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.


(0, 128, 0.0)

In [170]:
checkAccuracy(model.makePrediction(train_correct_dil[4]))

INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from ./model12/model.ckpt-1152
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.


(0, 128, 0.0)

In [171]:
checkAccuracy(model.makePrediction(train_greater_dil[4]))

INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from ./model12/model.ckpt-1152
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.


(128, 128, 1.0)

In [172]:
def checkAccuracy(df):
    correct = 0
    total = 0
    for index in df.index:
        if df["category"][index] == df["class_id"][index][0]:
            correct += 1
        total +=1
    return correct, total, correct/total

In [173]:
model.makePrediction(train_lower_dil[4])

INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from ./model12/model.ckpt-1152
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.


Unnamed: 0,12.5x,20x,50x,125x,250x,500x,category,logits,class_id
0,2143.051541,1376.573081,825.703275,314.624081,257.711329,76.207534,0,"[-159.12381, -162.18346, 119.91965]",[2]
1,2167.378706,1372.706900,802.416191,360.150817,212.003213,110.733925,0,"[-159.75626, -163.42535, 120.646065]",[2]
2,2031.009513,1515.166480,840.531731,339.125946,175.575614,100.072504,0,"[-159.92603, -161.70407, 119.98066]",[2]
3,1541.066391,1399.521193,795.096949,262.058286,180.634773,97.173114,0,"[-138.05388, -136.96504, 102.569695]",[2]
4,1963.300423,1440.554141,781.269596,361.233151,207.964955,105.789218,0,"[-155.41933, -157.21265, 116.71482]",[2]
5,2090.863446,1474.208478,765.787637,296.985126,218.502855,103.108106,0,"[-157.87074, -160.53993, 118.87182]",[2]
6,2095.107627,1528.052444,790.519112,318.263005,180.882805,121.926392,0,"[-160.64331, -163.13678, 120.84882]",[2]
7,2115.430660,1510.912248,766.326859,360.474241,165.368604,98.557871,0,"[-159.90256, -162.75066, 120.418144]",[2]
8,2194.746803,1392.438025,819.006548,344.112161,197.439574,105.951220,0,"[-160.59798, -164.36697, 121.27174]",[2]
9,1854.071899,1388.397260,703.098819,370.953781,225.847246,98.215469,0,"[-148.5611, -150.06375, 111.5573]",[2]


In [138]:
for row in trial2.data["A"]:
    print(row)

3047.0
2144.0
1182.0
558.0
340.0
171.0
2972.0
1961.0
1164.0
381.0
168.0
181.0
