In [6]:
##### -*-coding:utf-8 -*-
# import all the libraries 
# python==3.8; jupyterlab==3.0.12; lumicks.pylake==0.8.1; matplotlib==3.3.4; more-itertools==8.7.0;
# npTDMS==1.1.0; numpy==1.20.1; opencv-python==4.5.1.48; pandas==1.2.3; scipy==1.6.1; tifffile==2021.3.5
import os
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from __future__ import division
import ruptures as rpt
import pwlf
from scipy import interpolate
from __future__ import division
from more_itertools import chunked
%matplotlib widget

In [7]:
# read the excel data 
intens = pd.read_excel(r'C:\Users\KTS260\Google Drive\ExampleData_DNApTrace_20210720\image-TimeIntensity.xlsx')
print(intens.head())

   Unnamed: 0    time/s  intensity_binarized
0           0  4.260492                    1
1           1  4.424357                    1
2           2  4.588222                    1
3           3  4.752088                    1
4           4  4.915953                    1


In [8]:
# plot the data and simulated noisy data
time_intens = intens['time/s']
time_intens = np.array(time_intens)
intensity = intens['intensity_binarized']
intensity = np.random.normal(0, 0.05, len(intensity)) + intensity
intensity = np.array(intensity)

# plot the results
plt.figure()
plt.plot(time_intens, intensity)
plt.show()

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

In [9]:
# option1: combined rupture and pwlf, which determine the roughly change-point (by rupture) and refined with fit from a breakpoint guess(by pwlf)
# define a function PLRupture to calculate the speed, duration and processivity
# PLRupture is an improved changing point detection model based on rupture and piece-linear wise function
def PLRupture(basepairs,time_from,time_to,sigma):
    # define a temporary index to compute time of ROI, and subsuquent ROI of force, distance
    indtemp = np.where((time_intens <= time_to) & (time_intens >= time_from))
    time_ROI = time_intens[indtemp]
    basepairs_ROI = basepairs[indtemp]

# From now on we work on change point detection
    signal = basepairs_ROI  # 1D array
# Apply proper model & searching method to work on change point detection
    model = "l2"  # "l1", "rbf", "linear", "normal", "ar",...

# For faster predictions, one can modify the jump parameter during initialization. The higher it is, the faster the prediction is achieved (at the expense of precision).
# There are plenty of models can be used for change point detection; but only rpt.Pelt seems to work with a algo.predict(pen=)
    algo = rpt.Pelt(model=model, jump=1).fit(signal)
# algo = rpt.BottomUp(model=model).fit(signal)
# algo = rpt.Window(width=40, model=model).fit(signal)
# algo = rpt.Binseg(model=model).fit(signal)
# algo = rpt.Dynp(model="rbf", params=params, jump=1, min_size=2).fit(signal)


# In our case, the number of change points is unknown, we need to specify a threshold on the residual norm using epsilon or a penalty using the pen parameter.
# n = number of samples
# sigma = noise standard deviation; the bigger the sigma, the less break points
# dim = dimention, in this case: dim = 1
    n = len(signal)

# my_bkps = algo.predict(epsilon=3 * n * sigma ** 2)
# # or
    dim = 1
# https://centre-borelli.github.io/ruptures-docs/user-guide/detection/bottomup/#:~:text=In%20the%20situation,n%20*%20sigma%20**%202)
# penelty constant = np.log(n) * dim * sigma ** 2
    my_bkps = algo.predict(pen=np.log(n) * dim * sigma ** 2)
## or you can assign a breakpoint number, e.g.:
# my_bkps = algo.predict(n_bkps=3)

# trace back to time-basepair based on index
    time_bkp = np.zeros(len(my_bkps)-1)
    bps_bkp = np.zeros(len(my_bkps)-1)

# to exclude the last value
    time_bkp = time_ROI[my_bkps[0:-1]]
    bps_bkp = basepairs_ROI[my_bkps[0:-1]]

# to add the first value and last value to array
    time_bkp = np.insert(time_bkp,0,time_ROI[0])
    time_bkp = np.append(time_bkp,time_ROI[-1])
    bps_bkp = np.insert(bps_bkp,0,basepairs_ROI[0])
    bps_bkp = np.append(bps_bkp,basepairs_ROI[-1])
    
# initialize piecewise linear fit with your x and y data
    my_pwlf = pwlf.PiecewiseLinFit(time_ROI, basepairs_ROI)

# Uses L-BFGS-B optimization to find the location of breakpoints from a guess of where breakpoint locations should be.
    time_bkp = my_pwlf.fit_guess(time_bkp)

    from scipy import interpolate
    func_2 = interpolate.interp1d(time_ROI, basepairs_ROI,kind='slinear',fill_value="extrapolate")
    bps_bkp = func_2(time_bkp)
    
# bks = np.append(time_bkp,bps_bkp, axis=-1).reshape((2,len(my_bkps)+1)).T
    duration_time = np.diff(time_bkp)
    processivity_event = np.diff(bps_bkp)
    speed = processivity_event / duration_time
    bound_duration = time_bkp[-1] - time_bkp[0]
    bound_processivity = bps_bkp[-1] - bps_bkp[0]
    return time_from,time_to,time_bkp,bps_bkp,duration_time,processivity_event,speed


In [16]:
# option2: with only rupture algorithm
# define a function PLRupture to calculate the speed, duration and processivity
# PLRupture is an improved changing point detection model based on rupture and piece-linear wise function
def PLRupture(basepairs,time_from,time_to,sigma):
    # define a temporary index to compute time of ROI, and subsuquent ROI of force, distance
    indtemp = np.where((time_intens <= time_to) & (time_intens >= time_from))
    time_ROI = time_intens[indtemp]
    basepairs_ROI = basepairs[indtemp]

# From now on we work on change point detection
    signal = basepairs_ROI  # 1D array
# Apply proper model & searching method to work on change point detection
    model = "l2"  # "l1", "rbf", "linear", "normal", "ar",...

# For faster predictions, one can modify the jump parameter during initialization. The higher it is, the faster the prediction is achieved (at the expense of precision).
# There are plenty of models can be used for change point detection; but only rpt.Pelt seems to work with a algo.predict(pen=)
    algo = rpt.Pelt(model=model, jump=1).fit(signal)
# algo = rpt.BottomUp(model=model).fit(signal)
# algo = rpt.Window(width=40, model=model).fit(signal)
# algo = rpt.Binseg(model=model).fit(signal)
# algo = rpt.Dynp(model="rbf", params=params, jump=1, min_size=2).fit(signal)


# In our case, the number of change points is unknown, we need to specify a threshold on the residual norm using epsilon or a penalty using the pen parameter.
# n = number of samples
# sigma = noise standard deviation; the bigger the sigma, the less break points
# dim = dimention, in this case: dim = 1
    n = len(signal)

# my_bkps = algo.predict(epsilon=3 * n * sigma ** 2)
# # or
    dim = 1
# https://centre-borelli.github.io/ruptures-docs/user-guide/detection/bottomup/#:~:text=In%20the%20situation,n%20*%20sigma%20**%202)
# penelty constant = np.log(n) * dim * sigma ** 2
    my_bkps = algo.predict(pen=np.log(n) * dim * sigma ** 2)
## or you can assign a breakpoint number, e.g.:
# my_bkps = algo.predict(n_bkps=3)

# trace back to time-basepair based on index
    time_bkp = np.zeros(len(my_bkps)-1)
    bps_bkp = np.zeros(len(my_bkps)-1)

# to exclude the last value
    time_bkp = time_ROI[my_bkps[0:-1]]
    bps_bkp = basepairs_ROI[my_bkps[0:-1]]

# to add the first value and last value to array
    time_bkp = np.insert(time_bkp,0,time_ROI[0])
    time_bkp = np.append(time_bkp,time_ROI[-1])
    bps_bkp = np.insert(bps_bkp,0,basepairs_ROI[0])
    bps_bkp = np.append(bps_bkp,basepairs_ROI[-1])
    
# bks = np.append(time_bkp,bps_bkp, axis=-1).reshape((2,len(my_bkps)+1)).T
    duration_time = np.diff(time_bkp)
    processivity_event = np.diff(bps_bkp)
    speed = processivity_event / duration_time 
    bound_duration = time_bkp[-1] - time_bkp[0]
    bound_processivity = bps_bkp[-1] - bps_bkp[0]
    return time_from,time_to,time_bkp,bps_bkp,duration_time,processivity_event,speed


In [20]:
# option3: combined rupture and pwlf, which determine the roughly change-point (by rupture) and refined with pwlf
# define a function PLRupture to calculate the speed, duration and processivity
# PLRupture is an improved changing point detection model based on rupture and piece-linear wise function
def PLRupture(basepairs,time_from,time_to,sigma):
    # define a temporary index to compute time of ROI, and subsuquent ROI of force, distance
    indtemp = np.where((time_intens <= time_to) & (time_intens >= time_from))
    time_ROI = time_intens[indtemp]
    basepairs_ROI = basepairs[indtemp]

# From now on we work on change point detection
    signal = basepairs_ROI  # 1D array
# Apply proper model & searching method to work on change point detection
    model = "l2"  # "l1", "rbf", "linear", "normal", "ar",...

# For faster predictions, one can modify the jump parameter during initialization. The higher it is, the faster the prediction is achieved (at the expense of precision).
# There are plenty of models can be used for change point detection; but only rpt.Pelt seems to work with a algo.predict(pen=)
    algo = rpt.Pelt(model=model, jump=1).fit(signal)
# algo = rpt.BottomUp(model=model).fit(signal)
# algo = rpt.Window(width=40, model=model).fit(signal)
# algo = rpt.Binseg(model=model).fit(signal)
# algo = rpt.Dynp(model="rbf", params=params, jump=1, min_size=2).fit(signal)


# In our case, the number of change points is unknown, we need to specify a threshold on the residual norm using epsilon or a penalty using the pen parameter.
# n = number of samples
# sigma = noise standard deviation; the bigger the sigma, the less break points
# dim = dimention, in this case: dim = 1
    n = len(signal)

# my_bkps = algo.predict(epsilon=3 * n * sigma ** 2)
# # or
    dim = 1
# https://centre-borelli.github.io/ruptures-docs/user-guide/detection/bottomup/#:~:text=In%20the%20situation,n%20*%20sigma%20**%202)
# penelty constant = np.log(n) * dim * sigma ** 2
    my_bkps = algo.predict(pen=np.log(n) * dim * sigma ** 2)
## or you can assign a breakpoint number, e.g.:
# my_bkps = algo.predict(n_bkps=3)

# show results
#     fig, (ax,) = rpt.display(signal, my_bkps, figsize=(8, 4))
#     plt.show()
    # my_bkps returns index of break points
# trace back to time-basepair based on index
    time_bkp = np.zeros(len(my_bkps)-1)
    bps_bkp = np.zeros(len(my_bkps)-1)

# to exclude the last value
    time_bkp = time_ROI[my_bkps[0:-1]]
    bps_bkp = basepairs_ROI[my_bkps[0:-1]]

# to add the first value and last value to array
    time_bkp = np.insert(time_bkp,0,time_ROI[0])
    time_bkp = np.append(time_bkp,time_ROI[-1])
    bps_bkp = np.insert(bps_bkp,0,basepairs_ROI[0])
    bps_bkp = np.append(bps_bkp,basepairs_ROI[-1])
    
# initialize piecewise linear fit with your x and y data
    my_pwlf = pwlf.PiecewiseLinFit(time_ROI, basepairs_ROI)

# fit the data with the specified break points (ie the x locations of where
# the line segments should end
    my_pwlf.fit_with_breaks(time_bkp)
# predict for the determined points
    bps_bkp = my_pwlf.predict(time_bkp)

# bks = np.append(time_bkp,bps_bkp, axis=-1).reshape((2,len(my_bkps)+1)).T
    duration_time = np.diff(time_bkp)
    processivity_event = np.diff(bps_bkp)
    speed = processivity_event / duration_time
    bound_duration = time_bkp[-1] - time_bkp[0]
    bound_processivity = bps_bkp[-1] - bps_bkp[0]
    return time_from,time_to,time_bkp,bps_bkp,duration_time,processivity_event,speed


In [21]:
# for test purpose
time_from,time_to,time_bkp,bps_bkp,duration_time,processivity_event,speed = PLRupture(intensity,10,140,0.05)
print(duration_time)
print(processivity_event)
print(speed)
print(time_from)
print(time_to)

[ 7.70165907  1.47478578  5.40754786  5.40754786  0.32773017  1.14705561
 10.48736554  1.14705561  3.60503191  0.32773017  1.96638104  0.32773017
  0.32773017  0.49159526  0.32773017  8.02938924 32.93688241  1.14705561
 19.17221513  1.31092069  1.80251595  0.49159526  0.32773017 10.81509572
  2.4579763   1.63865087  3.44116682  0.32773017  0.81932543  1.80251595
  2.78570647]
[-4.57772028e-01  1.65180731e-04  4.32957797e-01 -2.05469188e-01
 -1.19942914e+00  1.28978775e+00 -1.41629382e-01 -1.96674280e-01
  4.76832116e-01  3.75585594e-03 -4.86066277e-01 -6.99612798e-02
 -9.02675258e-01  1.28628863e+00 -1.38091932e+00  1.24660417e+00
  1.95499981e-01 -7.44064579e-02 -3.43709654e-02 -4.04598928e-01
  2.83126951e-01 -6.23806808e-01  9.44405292e-01 -3.95846337e-01
 -5.28909552e-01  9.76195913e-01 -3.09130089e-01  2.91341337e-01
 -1.16607544e+00  6.17764300e-01  5.48279232e-01]
[-5.94381060e-02  1.12003203e-04  8.00654581e-02 -3.79967396e-02
 -3.65980687e+00  1.12443350e+00 -1.35047626e-02 -1

In [22]:
# for test purpose
# plot the results
plt.figure()
plt.xlim(-5,170)
plt.ylim(-0.3,1.5)
plt.plot(time_intens, intensity, label = "noisy data")
plt.plot(time_intens, intens['intensity_binarized'], label = 'raw data')
plt.scatter(time_bkp,bps_bkp,marker = "^",c = 'r', label = "change point")
plt.title("simulated data and rupture detection")
plt.legend()
plt.show()

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

In [11]:
excel_filename = filename + '-' + sheet_name + str(time_from) + '-' +str(time_to) + '.xlsx'

writer = pd.ExcelWriter(excel_filename)
data_1 = {'breaking time/s':time_bkp,
        'breaking position/bp':bps_bkp}
data_2 = {'event duration/s':duration_time,
        'event processivity/bp':processivity_event,
        'velocity/(bp/s)':speed}
data_3 = {'bound duration/s':bound_duration,
        'bound processivity/bp':bound_processivity}
df_1= pd.DataFrame(data_1)
df_2 = pd.DataFrame(data_2)
df_3 = pd.DataFrame(data_3)

df_1.to_excel(writer,sheet_name ='change_points', index=False)
df_2.to_excel(writer,sheet_name ='analyzed_data', index=False)
df_3.to_excel(writer,sheet_name ='processivity_one_burst', index=False)

writer.save()

In [12]:
from scipy import stats
sem = stats.sem(speed, axis=None, ddof=0)
avg = np.mean(speed)

print(avg)
print(sem)

0.4115294533319393
0.5330995601224714


In [13]:
fig, ax = plt.subplots(figsize=(6, 4))
n_bins = 30
n, bins, patches = ax.hist(speed, n_bins)
ax.set_title('Pol Rate')
ax.set_xlabel('Pol Rate(bp/s)')
ax.set_ylabel('Occurrence')
plt.show()
plt.tight_layout()

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

In [15]:
plt.figure(figsize=(10,6))
# plt.plot(time_10pN/1000,basepairs_10pN)
plt.plot(time_intens, intensity)
plt.scatter(time_bkp,bps_bkp, marker = "^",s=50,color='red')
plt.xlabel('Time/s')
plt.ylabel('Basepairs')
plt.title('Example Rupture Detection')
plt.tight_layout()

# plt.savefig("C:\\Users\\KTS260\\Desktop\\' + name + '-cycle#'+ cycle + '.png",dpi=None, facecolor='w', edgecolor='w',
#         orientation='portrait',format=None,
#         transparent=False, bbox_inches=None, pad_inches=0.1,
#         metadata=None)
# plt.savefig(cycledic + '.png')
plt.show()

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …