<a href="https://colab.research.google.com/github/shengyi2/spectrum_analysis/blob/main/Heterodimer_absorbance_curve_deconvolution_colab.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Open absorbance spectra
  - FRET absorbance
  - absorbance spectrum of the FRET acceptor alone
  - absorbance spectrum of the FRET donar alone
  - please prepare them seperately in csv or excel files with a column for wavelength and a column for fluorescence reading

 - sample tables are shown below. Headers are optional as the columns are recognized in the order of wavelength and absorbance by default

| wavelength | absorbance |
| ---- | --- |
| 350 | 360 |
| 355 | 690 |
| 360 | 900 |

In [None]:
import pandas as pd

In [None]:
# open the file of FRET spectra/spectrum
def import_Colab(variable):
  print('Import',variable,'spectrum')
 
  uploaded = files.upload()

  for fn in uploaded.keys():
    print('User uploaded file "{name}" with length {length} bytes'.format(
        name=fn, length=len(uploaded[fn])))
    # read csv or excel
  if '.csv' in fn:
    data=pd.read_csv(fn,names=['wavelength',variable],dtype={'wavelength':int})
  elif '.xls' in fn:
    data=pd.read_excel(fn,names=['wavelength',variable],dtype={'wavelength':int})
  else:
    print('File type not accepted, please rerun and select right file')
    return fn 
  return fn,data

In [None]:
  from google.colab import files
  

# import spectra on Colab


> 



In [None]:
# import FRET spectrum from local drive
FRET_path,FRET_table =import_Colab('FRET')

Import FRET spectrum


Saving Ca GSC.xlsx to Ca GSC (1).xlsx
User uploaded file "Ca GSC.xlsx" with length 18229 bytes


In [None]:
FRET_table.FRET=FRET_table.FRET/FRET_table.FRET.max()

In [None]:
FRET_table.head()

Unnamed: 0,wavelength,FRET
0,200,0.569242
1,201,0.0
2,202,-0.429185
3,203,0.0
4,204,1.0


In [None]:
sample_name = FRET_path.replace('.xlsx','')

In [None]:
sample_name

'Ca GSC'

# import acceptor and donar FP spectra from local drive

In [None]:
acceptor_path,acceptor_table=import_Colab('Acceptor')
acceptor_table.Acceptor=acceptor_table.Acceptor/acceptor_table.Acceptor.max()
acceptor_table.head()

Import Acceptor spectrum


UnboundLocalError: ignored

In [None]:
donar_path,donar_table=import_Colab('Donar')
donar_table.Donar=donar_table.Donar/donar_table.Donar.max()
donar_table.head()

### import acceptors and donar FP spectra from FPbase



#### import spectra from FPbase

In [None]:
import requests
url='http://www.fpbase.org/api/proteins/spectra'
response = requests.get(
    url,
    params={'q': 'requests+language:python'}   
)


In [None]:
response

In [None]:
spectra_list=response.json()

#### find the donar spectrum based on input name

In [None]:
donar_name=input('name of the donar FP on FPbase is: ')

In [None]:
input_dict={}
for FP in spectra_list:
  # print(FP)
  # print('---')
  if donar_name in FP['name']:
    print(FP['name'])
    input_dict=FP



In [None]:
for spectrum in input_dict['spectra']:
  # print(spectrum)
  if 'em' in spectrum['state']:
    donar_spectrum=spectrum['data']

In [None]:
donar_table=pd.DataFrame(donar_spectrum, dtype=int,columns=['wavelength','Donar'])

In [None]:
donar_table

#### find the acceptor spectrum based on input name

In [None]:
acceptor_name=input('name of the acceptor FP on FPbase is: ')

In [None]:
input_dict={}
for FP in spectra_list:
  # print(FP)
  # print('---')
  if acceptor_name in FP['name']:
    print(FP['name'])
    input_dict=FP



In [None]:
for spectrum in input_dict['spectra']:
  # print(spectrum)
  if 'em' in spectrum['state']:
    acceptor_spectrum=spectrum['data']

In [None]:
acceptor_table=pd.DataFrame(acceptor_spectrum, dtype=int,columns=['wavelength','Acceptor'])

In [None]:
acceptor_table

# Merge all three spectra into one table

In [None]:
concat_table=pd.merge(
    acceptor_table,
    donar_table,
    how="inner",
    on='wavelength',
    # left_on=None,
    # right_on=None,
    # left_index=False,
    # right_index=False,
    # sort=True,
    # suffixes=("_x", "_y"),
    # copy=True,
    # indicator=False,
    # validate=None,
)
concat_table

Unnamed: 0,wavelength,Acceptor,Donar
0,300,0.037127,0.053670
1,301,0.028862,0.045955
2,302,0.021951,0.039343
3,303,0.016125,0.033943
4,304,0.011247,0.027661
...,...,...,...
496,796,-0.021274,0.000441
497,797,-0.021409,0.000441
498,798,-0.021545,0.000441
499,799,-0.021680,0.000331


In [None]:
concat_table=pd.merge(
    concat_table,
    FRET_table,
    how="inner",
    on='wavelength',
    # left_on=None,
    # right_on=None,
    # left_index=False,
    # right_index=False,
    # sort=True,
    # suffixes=("_x", "_y"),
    # copy=True,
    # indicator=False,
    # validate=None,
)
concat_table

Unnamed: 0,wavelength,Acceptor,Donar,FRET
0,300,0.037127,0.053670,0.065951
1,301,0.028862,0.045955,0.060658
2,302,0.021951,0.039343,0.055508
3,303,0.016125,0.033943,0.051073
4,304,0.011247,0.027661,0.047353
...,...,...,...,...
496,796,-0.021274,0.000441,-0.001288
497,797,-0.021409,0.000441,-0.001288
498,798,-0.021545,0.000441,-0.001431
499,799,-0.021680,0.000331,-0.001431


# Let's start fitting
  - let's consider the relationship of FRET, acceptor, and donar at any wavelength as:


> > FRET(acceptor,donar)=a\*acceptor+b\*donar

where a and b are fitting variables 



In [None]:
concat_table

Unnamed: 0,wavelength,Acceptor,Donar,FRET
0,300,0.037127,0.053670,0.065951
1,301,0.028862,0.045955,0.060658
2,302,0.021951,0.039343,0.055508
3,303,0.016125,0.033943,0.051073
4,304,0.011247,0.027661,0.047353
...,...,...,...,...
496,796,-0.021274,0.000441,-0.001288
497,797,-0.021409,0.000441,-0.001288
498,798,-0.021545,0.000441,-0.001431
499,799,-0.021680,0.000331,-0.001431


In [None]:
# fit a second degree polynomial to the economic data
from numpy import arange
from pandas import read_csv
from scipy.optimize import curve_fit
from matplotlib import pyplot
 
# define the true objective function
# def objective(x, a, b, c):
# 	return a * x + b * x**2 + c
def objective(X,a,b):
  acceptor,donar=X
  return a*acceptor+b*donar
 
# # load the dataset
# url = 'https://raw.githubusercontent.com/jbrownlee/Datasets/master/longley.csv'
# dataframe = read_csv(url, header=None)
# data = dataframe.values
# choose the input and output variables
# x, y = data[:, 4], data[:, -1]
acceptor=concat_table['Acceptor']*1
donar=concat_table['Donar']*1
FRET=concat_table['FRET']*1


# curve fit
popt, _ = curve_fit(objective, (acceptor, donar),FRET,p0=[0.50558548, 0.50558548],method='dogbox',jac='cs')
print(popt)
# summarize the parameter values

a,b=popt
print('FRET_overlay_spectrum = %.5f * Acceptor + %.5f * Donar' % (a, b))

# plot input vs output
# pyplot.scatter(x, y)
# # define a sequence of inputs between the smallest and largest known inputs
# x_line = arange(min(x), max(x), 1)
# # calculate the output for the range
# y_line = objective(x_line, a, b, c)
# # create a line plot for the mapping function
# pyplot.plot(x_line, y_line, '--', color='red')
# pyplot.show()

[0.33314086 0.30196892]
FRET_overlay_spectrum = 0.33314 * Acceptor + 0.30197 * Donar


In [None]:
from sklearn.metrics import r2_score
print('R^2_score: ', r2_score(objective((acceptor,donar),a,b),FRET))
r2=r2_score(objective((acceptor,donar),a,b),FRET)

R^2_score:  0.9801482915837373


In [None]:
data_graph={'wavelength':concat_table.wavelength,'Acceptor':concat_table.Acceptor*a,'Donar':concat_table.Donar*b,'Overlay':objective((concat_table.Acceptor, concat_table.Donar),a,b),'Measured_curve':concat_table.FRET}
graph_table=pd.DataFrame(data_graph)

In [None]:
graph_table

Unnamed: 0,wavelength,Acceptor,Donar,Overlay,Measured_curve
0,300,0.012369,0.016207,0.028575,0.065951
1,301,0.009615,0.013877,0.023492,0.060658
2,302,0.007313,0.011880,0.019193,0.055508
3,303,0.005372,0.010250,0.015622,0.051073
4,304,0.003747,0.008353,0.012100,0.047353
...,...,...,...,...,...
496,796,-0.007087,0.000133,-0.006954,-0.001288
497,797,-0.007132,0.000133,-0.006999,-0.001288
498,798,-0.007177,0.000133,-0.007044,-0.001431
499,799,-0.007223,0.000100,-0.007123,-0.001431


In [None]:
graph_table.to_csv(sample_name+"_processed.csv")

In [None]:
import plotly.graph_objects as go



fig = go.Figure()

# Add traces
fig.add_trace(go.Scatter(x=concat_table.wavelength, y=concat_table.Donar,
                         mode='lines',
                         name='Donar'))
fig.add_trace(go.Scatter(x=concat_table.wavelength, y=concat_table.Acceptor,
                         mode='lines',
                         name='Acceptor'))
fig.add_trace(go.Scatter(x=concat_table.wavelength, y=concat_table.FRET,
                         mode='lines',
                         name='FRET'))

fig.show()

In [None]:
fig = go.Figure()

# Add traces
fig.add_trace(go.Scatter(x=graph_table.wavelength, y=graph_table.Donar,
                         mode='lines',
                         name=donar_name))
fig.add_trace(go.Scatter(x=graph_table.wavelength, y=graph_table.Acceptor,
                         mode='lines',
                         name=acceptor_name))
fig.add_trace(go.Scatter(x=graph_table.wavelength, y=graph_table.Overlay,
                         mode='lines',
                         name='Overlay'))
fig.add_trace(go.Scatter(x=graph_table.wavelength, y=graph_table.Measured_curve,
                         mode='lines',
                         name=sample_name))

fig.show()
fig.write_html('/deconv.html')

In [None]:
print('The measured curve is', a/(a+b)*100,'% from the acceptor',acceptor_name,'and',b/(a+b)*100,'% from the donar',donar_name)

The measured curve is 52.45405894169504 % from the acceptor gScarlet and 47.54594105830497 % from the donar mScarlet-I


In [None]:
print('The measured curve of',sample_name,'is', a/(a+b)*100,'% from',acceptor_name,'and',b/(a+b)*100,'% from ',donar_name,'. r2=',r2)

The measured curve of Ca GSC is 52.45405894169504 % from gScarlet and 47.54594105830497 % from  mScarlet-I . r2= 0.9801482915837373
