In [6]:
%matplotlib inline
import matplotlib.pyplot as plt
import pandas
from scipy.optimize import curve_fit 
from numpy import log, exp, linspace, sqrt, diag
from sklearn.linear_model import LinearRegression

df = pandas.read_csv( 'data/assay_data_clean.csv' )
temp_key = pandas.read_csv( 'data/temp_key.csv' )
temp_key.index = temp_key.Well
df['temp'] = df.well.str[0].map( temp_key.Kelvin.to_dict() ) 
df.dropna( inplace=True )

# logistic equation 
def f(x, x0, k): 
    return 1/(1+exp(-k*(x-x0)))

# util function 
def fit_and_make_plot( df ):
    name = df.mutant.unique()[0]
    df.rate = df.rate / df.rate.max()
    
    # linear fit 
    reg = LinearRegression()
    reg.fit( df.temp.reshape(-1, 1), df.rate )
    slope = reg.coef_[0]
    
    # try fitting to logistic eqn 
    try:
        p0 = ( df.temp.mean(), slope )
        popt, pcov = curve_fit( f, df.temp, df.rate, p0=p0 )
        perr = sqrt( diag( pcov ) )
        
        # plot
        plt.figure( figsize=(2,2) )
        plt.scatter( df.temp, df.rate, alpha=0.7, color='black', marker='.' )
        x_space = linspace( df.temp.min(), df.temp.max(), 100 )
        plt.plot( x_space, f( x_space, *popt ), alpha=0.5, color='purple' )
        plt.xlabel( 'T $(K)$' )
        plt.ylabel( '$min^{-1}$')
        plt.xticks( [ 303, 313, 323 ] )
        plt.yticks( [ 0, 0.25, 0.5, 0.75, 1 ] )
        plt.title( name )
        plt.savefig( 'plots/%s.pdf' % name, format='pdf' )
        plt.close()


        return pandas.Series( dict( zip( ( 'tm', 'k' ) , popt ) ) ) 
    except Exception as e:
        print e
        return pandas.Series()        
    
    
fits = df.groupby( by='mutant' ).apply( fit_and_make_plot )
fits.sample(10)

Unnamed: 0_level_0,k,tm
mutant,Unnamed: 1_level_1,Unnamed: 2_level_1
Y295A,0.351082,325.548702
V55G,-1.005693,312.356298
E17S,-0.792363,312.574283
W120F,-0.462998,314.660527
Q316R,-0.591794,313.257592
M261D,-6.312323,303.477102
A356A,-1.124698,313.038266
A195S,-0.9063,312.232598
W328C,-0.898275,312.330769
H122E,-0.120498,293.882455


In [7]:
fits['native'] = fits.index.str[0]
fits['designed'] = fits.index.str[-1]
fits['position'] = fits.index.str[1:-1].astype( int )
fits['mutant'] = fits.index
fits.head()

Unnamed: 0_level_0,k,tm,native,designed,position,mutant
mutant,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
A195S,-0.9063,312.232598,A,S,195,A195S
A356A,-1.124698,313.038266,A,A,356,A356A
A357A,-0.325209,312.244581,A,A,357,A357A
A408A,-1.260728,313.105931,A,A,408,A408A
C167Q,-0.532181,311.621618,C,Q,167,C167Q


In [11]:
for ( index, series ) in fits.iterrows():
    print 'NATRO\nSTART\n', series.position, series.native, 'PIKKA', series.designed, '\n'

NATRO
START
195 A PIKKA S 

NATRO
START
356 A PIKKA A 

NATRO
START
357 A PIKKA A 

NATRO
START
408 A PIKKA A 

NATRO
START
167 C PIKKA Q 

NATRO
START
403 D PIKKA A 

NATRO
START
154 E PIKKA D 

NATRO
START
164 E PIKKA A 

NATRO
START
177 E PIKKA A 

NATRO
START
17 E PIKKA S 

NATRO
START
180 E PIKKA K 

NATRO
START
180 E PIKKA L 

NATRO
START
406 E PIKKA D 

NATRO
START
426 E PIKKA S 

NATRO
START
75 F PIKKA H 

NATRO
START
101 H PIKKA R 

NATRO
START
122 H PIKKA E 

NATRO
START
122 H PIKKA N 

NATRO
START
315 H PIKKA N 

NATRO
START
373 H PIKKA R 

NATRO
START
379 H PIKKA R 

NATRO
START
379 H PIKKA T 

NATRO
START
247 I PIKKA N 

NATRO
START
303 I PIKKA N 

NATRO
START
91 I PIKKA E 

NATRO
START
344 K PIKKA A 

NATRO
START
174 L PIKKA R 

NATRO
START
261 M PIKKA D 

NATRO
START
261 M PIKKA T 

NATRO
START
323 M PIKKA K 

NATRO
START
166 N PIKKA D 

NATRO
START
223 N PIKKA G 

NATRO
START
223 N PIKKA H 

NATRO
START
293 N PIKKA A 

NATRO
START
407 N PIKKA C 

NATRO
START
22 Q PIKKA 