In [108]:
import numpy as np
import pandas as pd
import matplotlib
matplotlib.use('Agg')
import matplotlib.pyplot as plt
from scipy.stats import linregress
import os
import tensorflow as tf

In [109]:
class Model:
    def __init__(self, model_dir):
        #self.model = self.build_model(model_dir)
        self.model = self.build_model_regu(model_dir)

    #Build Model
    def build_model(self, model_dir):
        continuous_features = [tf.feature_column.numeric_column(str(k)) for k in FEATURES]
        model = tf.estimator.LinearClassifier(
            n_classes = 3,
            model_dir=model_dir,
            feature_columns= continuous_features)
        return model
        #Build Model
    def build_model_regu(self, model_dir):
        continuous_features = [tf.feature_column.numeric_column(str(k)) for k in FEATURES]
        model = tf.estimator.LinearClassifier(
            n_classes = 3,
            model_dir=model_dir,
            feature_columns= continuous_features,
            optimizer=tf.train.FtrlOptimizer(
                learning_rate=0.1,
                l1_regularization_strength=0.9,
                l2_regularization_strength=5))
        return model

    #Train Model
    def train_model(self, df_train):
        self.model.train(input_fn=get_input_fn(df_train,
                                               num_epochs=None,
                                               n_batch = 16,
                                               shuffle=False),
                         steps=128)

    #Evaluate Model
    def eval_model(self, df_test):
        self.model.evaluate(input_fn=get_input_fn(df_test,
                                                  num_epochs=None,
                                                  n_batch = 16,
                                                  shuffle=False),
                            steps=128)
    #Make Prediction with Model
    def makePrediction(self, df):
        outputDF = df.copy()
        pred_iter = self.model.predict(tf.estimator.inputs.pandas_input_fn(df, shuffle=False))
        probabilities = []
        classifier = []

        for i in pred_iter:
            probabilities.append(i['logits'])
            classifier.append(i['class_ids'])
        outputDF['logits'] = probabilities
        outputDF['class_id'] = classifier
        return outputDF
def get_input_fn(data_set, num_epochs=None, n_batch = 16, shuffle=True):
    return tf.estimator.inputs.pandas_input_fn(
        x=pd.DataFrame({str(k): data_set[k].values for k in FEATURES}),
        y = pd.Series(data_set['category'].values),
        batch_size=n_batch,
        num_epochs=num_epochs,
        shuffle=shuffle)

def monteCarloTrainingData(singular_list, numSets=10, dataPts=128):
    allsets = []
    mu = np.mean(singular_list)
    sigma = np.std(singular_list)
    #print(mu, sigma)
    for _ in range(numSets):
        gen = np.random.normal(mu, sigma, dataPts)
        allsets.append(gen)
    return allsets

def createTrainingDF(allsets, category):
    trainingDF = pd.DataFrame(columns=FEATURES)
    for index in range(len(allsets)):
        trainingDF[FEATURES[index]] = allsets[index]
    trainingDF["category"] = category
    return trainingDF

def generateDF(dilutions_dict, category, numSets=10):
    allDat = {}
    for key in dilutions_dict.keys():
        allDat[key] = monteCarloTrainingData(dilutions_dict[key])
    trainingDF = []
    for index in range(numSets):
        trainingDF.append(createTrainingDF([allDat[key][index] for key in allDat.keys()], category))
    return trainingDF


FEATURES = ["12.5x", "20x", "50x", "125x", "250x", "500x"]

METADATA = ["Date:", "Time:", "Measurement mode:", "Excitation wavelength:",
            "Emission wavelength:", "Excitation bandwidth:", "Emission bandwidth:",
            "Gain (Manual):", "Number of reads:", "FlashMode:", "Integration time:", "Lag time:",
            "Part of the plate:", "Target Temperature:", "Current Temperature:"]

class PipetteTutorial:
    def __init__(self, excelname, save_loc):
        excel = os.path.join(save_loc, excelname)
        self.df = pd.read_excel(excel, sheet_name=0)
        self.metadata = self.parseMetadata()
        self.data = self.parseData()
    def parseMetadata(self):
        df = self.df
        metadata = {}
        def parse(name):
            df2 = df.loc[df[df.columns[0]]==name].dropna(axis=1)
            return df2[df2.columns[1]].iloc[0]
        for item in METADATA:
            metadata[item] = parse(item)
        return metadata
    def parseData(self):
        data = {}
        df = self.df
        start_index = df.loc[df[df.columns[0]]=='<>'].index[0]
        df2 = df.iloc[start_index+1:]
        index = 'A'
        for row in df2.iterrows():
            df_row = list(row[1][1:])
            if "..." not in df_row:
                data[index] = df_row
                index = chr(ord(index) + 1)
        return pd.DataFrame(data)
    def dilutionLine(self, row, save_loc, df):
        Output = []
        print(df)
        plt.plot(df[row])
        plt.title(row)
        Output.append(str(row) + ": " +  str(linregress(df[row], df.index)))
        plt.savefig(os.path.join(save_loc, row+"_lineplot.png"))
        plt.close()
        return Output

In [110]:
trial2 = PipetteTutorial("trial_2_data.xlsx", "./")

In [111]:
dilutions = {}
dilutions["12.5x"] = list(trial2.data.iloc[6])
dilutions["20x"] = list(trial2.data.iloc[7])
dilutions["50x"] = list(trial2.data.iloc[8])
dilutions["125x"] = list(trial2.data.iloc[9])
dilutions["250x"] = list(trial2.data.iloc[10])
dilutions["500x"] = list(trial2.data.iloc[11])
dilutions

{'12.5x': [2972.0, 3268.0, 3241.0, 3061.0, 2151.0, 3133.0, 3100.0, 2928.0],
 '20x': [1961.0, 2064.0, 2172.0, 1975.0, 1994.0, 2024.0, 1985.0, 2113.0],
 '50x': [1164.0, 1166.0, 1202.0, 1143.0, 1119.0, 1103.0, 1087.0, 1058.0],
 '125x': [381.0, 448.0, 468.0, 483.0, 520.0, 468.0, 520.0, 496.0],
 '250x': [168.0, 283.0, 275.0, 289.0, 281.0, 287.0, 285.0, 290.0],
 '500x': [181.0, 132.0, 139.0, 146.0, 150.0, 149.0, 151.0, 160.0]}

In [151]:
lower_dilutions = {}
for key in dilutions.keys():
    lower_dilutions[key] = [x*.7 for x in dilutions[key]]
lower_dilutions

{'12.5x': [2672.0, 2968.0, 2941.0, 2761.0, 1851.0, 2833.0, 2800.0, 2628.0],
 '20x': [1661.0, 1764.0, 1872.0, 1675.0, 1694.0, 1724.0, 1685.0, 1813.0],
 '50x': [864.0, 866.0, 902.0, 843.0, 819.0, 803.0, 787.0, 758.0],
 '125x': [81.0, 148.0, 168.0, 183.0, 220.0, 168.0, 220.0, 196.0],
 '250x': [-132.0, -17.0, -25.0, -11.0, -19.0, -13.0, -15.0, -10.0],
 '500x': [-119.0, -168.0, -161.0, -154.0, -150.0, -151.0, -149.0, -140.0]}

In [152]:
greater_dilutions = {}
for key in dilutions.keys():
    greater_dilutions[key] = [x*1.3 for x in dilutions[key]]
greater_dilutions

{'12.5x': [3272.0, 3568.0, 3541.0, 3361.0, 2451.0, 3433.0, 3400.0, 3228.0],
 '20x': [2261.0, 2364.0, 2472.0, 2275.0, 2294.0, 2324.0, 2285.0, 2413.0],
 '50x': [1464.0, 1466.0, 1502.0, 1443.0, 1419.0, 1403.0, 1387.0, 1358.0],
 '125x': [681.0, 748.0, 768.0, 783.0, 820.0, 768.0, 820.0, 796.0],
 '250x': [468.0, 583.0, 575.0, 589.0, 581.0, 587.0, 585.0, 590.0],
 '500x': [481.0, 432.0, 439.0, 446.0, 450.0, 449.0, 451.0, 460.0]}

In [153]:
train_lower_dil = generateDF(lower_dilutions, 0)
train_correct_dil = generateDF(dilutions, 1)
train_greater_dil = generateDF(greater_dilutions, 2)

In [154]:
model = Model("./model12")
for index in range(3):
    model.train_model(train_lower_dil[index])
    model.train_model(train_correct_dil[index])
    model.train_model(train_greater_dil[index])

INFO:tensorflow:Using default config.
INFO:tensorflow:Using config: {'_model_dir': './model11', '_tf_random_seed': None, '_save_summary_steps': 100, '_save_checkpoints_steps': None, '_save_checkpoints_secs': 600, '_session_config': allow_soft_placement: true
graph_options {
  rewrite_options {
    meta_optimizer_iterations: ONE
  }
}
, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': 100, '_train_distribute': None, '_device_fn': None, '_protocol': None, '_eval_distribute': None, '_experimental_distribute': None, '_service': None, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x1a2c03b9e8>, '_task_type': 'worker', '_task_id': 0, '_global_id_in_cluster': 0, '_master': '', '_evaluation_master': '', '_is_chief': True, '_num_ps_replicas': 0, '_num_worker_replicas': 1}
INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Create CheckpointSaverHook.
INFO:tensorflow:Graph was finaliz

In [155]:
def performTest(df):
    test_data = {}
    index = 0
    for val in df["A"].iloc[:6]:
        test_data[FEATURES[index]] = [val]
        index += 1
    return test_data
model.makePrediction(pd.DataFrame(performTest(trial2.data)))

INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from ./model11/model.ckpt-4608
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.


Unnamed: 0,12.5x,20x,50x,125x,250x,500x,logits,class_id
0,3047.0,2144.0,1182.0,558.0,340.0,171.0,"[-293.8187, -88.221756, -147.251]",[1]


In [156]:
checkAccuracy(model.makePrediction(train_lower_dil[4]))

INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from ./model11/model.ckpt-4608
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.


(128, 128, 1.0)

In [157]:
checkAccuracy(model.makePrediction(train_correct_dil[4]))

INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from ./model11/model.ckpt-4608
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.


(128, 128, 1.0)

In [158]:
checkAccuracy(model.makePrediction(train_greater_dil[4]))

INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from ./model11/model.ckpt-4608
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.


(128, 128, 1.0)

In [159]:
def checkAccuracy(df):
    correct = 0
    total = 0
    for index in df.index:
        if df["category"][index] == df["class_id"][index][0]:
            correct += 1
        total +=1
    return correct, total, correct/total

In [160]:
model.makePrediction(train_lower_dil[4])

INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from ./model11/model.ckpt-4608
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.


Unnamed: 0,12.5x,20x,50x,125x,250x,500x,category,logits,class_id
0,2891.557795,1681.797086,761.787477,181.998128,-21.545832,-147.319978,0,"[-18.7386, -74.79849, -333.52127]",[0]
1,2451.507675,1819.836694,817.794798,214.563339,-54.390001,-164.862112,0,"[-4.7857723, -81.08001, -328.76935]",[0]
2,3323.181900,1856.553969,855.251761,214.409097,-64.557276,-156.631873,0,"[-12.158455, -88.06228, -385.6015]",[0]
3,2837.122400,1667.181686,879.059504,135.070374,8.734147,-134.041490,0,"[-32.003147, -71.77426, -320.71344]",[0]
4,3181.428060,1764.498617,862.661329,114.751759,-64.851051,-146.832707,0,"[-5.558275, -78.60204, -373.24814]",[0]
5,2822.603075,1679.668803,856.077995,165.700314,-30.190360,-174.348142,0,"[-7.381809, -77.418, -343.5804]",[0]
6,1979.013699,1714.264712,881.883962,164.619079,23.473763,-143.126346,0,"[-30.384464, -68.9279, -267.71216]",[0]
7,2538.836202,1724.676759,823.310743,193.145776,-25.599232,-163.801101,0,"[-12.296461, -76.810745, -321.56506]",[0]
8,2506.283244,1834.658623,833.866310,163.305528,-51.154546,-148.320716,0,"[-8.482922, -77.3778, -328.993]",[0]
9,2704.764430,1838.465484,849.308293,174.801994,-76.080212,-144.739226,0,"[-4.5875416, -81.8932, -347.20325]",[0]


In [138]:
for row in trial2.data["A"]:
    print(row)

3047.0
2144.0
1182.0
558.0
340.0
171.0
2972.0
1961.0
1164.0
381.0
168.0
181.0
