In [27]:
import numpy as np
import pandas as pd
import random
#import sys
#!{sys.executable} -m pip install openpyxl
import matplotlib.pyplot as plt
pi = np.pi

In [28]:
class TltStatistics():
    
    '''Class for genterating ties, loops, and tails from a distribution of polymers. Assumes three dimensional
    random walk.
    
    Attributes
    ----------
    filepath: str
        local file path of the data set (xlsx-file) containing information of chains
    alpha: float
        degree of crystallinity
    la: float
        amorphous thickness in nanometer (nm)
    lk: float
        Kuhn-length in nm
    n_monomers: float
        no. of monomers per Kuhn segment
    '''
    
    def __init__(self, filepath, alpha, la, lk, n_monomers):
        '''initializes the attributes'''
        self.filepath = filepath
        self.alpha = alpha
        self.la = la
        self.lk = lk
        self.lc = round(alpha*la/(1-alpha))
        self.n_monomers = n_monomers
    
    def __repr__(self):
        '''prints the attributes and their values assigned'''
        return "TltStatistics(alpha = {}, Amorphous thickness = {} nm, Kuhn length = {} nm, No. of monomers per Kun segment = {} nm)".format(alpha, la, lk, n_monomers)
    
    def get_data(self):
        '''imports the data comming from experimentalists'''
        self.data = pd.read_excel(filepath)
        print(self.data)
        
    def f1(self, s_left):
        '''left-side chain walk'''
        
        step_list = [1,-1,0,0,0,0]# random walk on a cubic lattic
        pos0 = 1
        x0 = 0 #surface level
        pos = pos0
        length = 1
        sub_chain = 'n'
    
        if s_left==1:
            return ['tail', 1] #tails of length 1 are allowed
        else:
            while length<s_left:
                step = random.choice(step_list)
                pos = pos+step
                length = length+1
                if pos-x0==0 or pos-x0==la or length==s_left:
                    break
            if pos-x0==la: 
                sub_chain = 'tie'
            if pos-x0==0:
                sub_chain = 'loop'
            if length==s_left: #if a loop or tie just exhaustes the chain then it will be a tail**
                sub_chain = 'tail'
            
            return [sub_chain,length]
            
    def f(self, s):
        '''creates a list of trajectories'''
        
        s_left = s
        ans_list = []
        
        while s_left>0: 
            ans = self.f1(s_left)
            ans_list.append(ans)
            s_left = s_left - ans[1]  
        self.left_all = ans_list

    def g0(self, N):
        '''creates full trajectories'''
        
        Nc = alpha*N #no. of crystalline monomers
        lc = round(alpha*la/(1-alpha)) #lamellar thickness
        n_stems = round(Nc/lc) #no of stems - this rounding off might cause some approxs, but thats okay
    
        if Nc < lc: #forms full stems (although the stems are shorter) and tails only in this case,
            #biggest source of model ambiguity
            n1 = N-lc #amorphous length
            list1 = [['tail', n1]] # - stems are of full lengths, as long as N>lc
            list2 = [['tail', 0]]
            count = round(Nc/lc) #stem count
            
        else:
            Nc = n_stems*lc
            Na = N - Nc
            n1 = random.randint(0,Na)
            n2 = Na-n1
            if n1>0:
                self.f(n1)
                list1 = self.left_all
            else:
                list1 = [['tail',0]]
            
            if n2>0:
                self.f(n2)
                list2 = self.left_all
            else:
                list2 = [['tail',0]]
            count = len(list1)+len(list2)-1 # no of stems = ntie+nloop-1

        lis = [[n1,list1,list2], count]
        self.sequence0 = lis
       
    def g(self, N): #
        '''makes sure that the required no. of stems are formed'''
        count = 'count'
        Nc = alpha*N
        lc = round(alpha*la/(1-alpha))
        n_stems = round(Nc/lc)

        while count!=n_stems:
            self.g0(N)
            lis = self.sequence0
            count = lis[1]
            
        lis = lis[0][1:]
        lis1 = lis[0][::-1]
        lis2 = lis[1]
        lis = lis1+lis2
        lis = [(i[1],i[0]) for i in lis]
        
        return(lis)
        
  
    def create_seq(self):
        '''final sequence to be outputted'''
       
        self.get_data()
        global_seq_list = []
        df = self.data 
        for index, row in df.iterrows():
            dp, n = round(row[0]/n_monomers), row[1] #dp - no. of kuhn segments, n - no. of  chains
            if dp <= self.lc: #if total length < lc - do not include those chains
                continue
            seq_list = []
            for _ in range(n):
                final_sequence = self.g(dp)
                seq_list.append(final_sequence) # seq for all chain of given dp
            global_seq_list.extend(seq_list)
            
        return global_seq_list
   

In [29]:
filepath = 'Chain_length distributions.xlsx'
alpha = 0.5
la = 8.0
lk = 1.0
n_monomers = 1.0

In [30]:
tlt = TltStatistics(filepath, alpha, la, lk, n_monomers)

In [31]:
tlt

TltStatistics(alpha = 0.5, Amorphous thickness = 8.0 nm, Kuhn length = 1.0 nm, No. of monomers per Kun segment = 1.0 nm)

In [32]:
#tlt.get_data()

In [33]:
#tlt.g(15)
#tlt.create_seq()

In [None]:
# Now create data

tlt = TltStatistics(filepath, alpha, la, lk, n_monomers)
n = 10 #no. of amorphous sections
import pickle

for i in range(n): 
    sequence = tlt.create_seq()
    with open("walk{}".format(i), "wb") as fp:   
        pickle.dump(sequence, fp) # saves it as a pickle file
        print("Data saved")
    