In [1]:
import re
import random
import numpy as np 
import pickle

### Tokenization

In [2]:
def remove_space_from_quatation(string):
    temp = string.split()
    
    final_str = ''
    
    for i in temp:
        
        if i=='’':
            t = final_str[:len(final_str)-1] + "'"
            final_str = t
        else:
            final_str += i
            final_str += ' '
            
    return final_str
            
def get_tokens(statement):
    fs = remove_space_from_quatation(statement)
    count = 0
    ans = re.findall(r'[\'\w]+', fs)
    
    final_tokens = []
    
    for i in ans:
        final_tokens.append( i.lower())
        count += 1
        
    return final_tokens ,count 


def tokenize(file):
    count = 0
    f = open(file)
    lines = f.readlines()
    
    tokens = []
    
    for i in lines:
        temp ,c = get_tokens(i)
        
        count += c
        tokens.append(temp)
        
    return tokens ,count

### Adding starting and ending tokens in each statements 

In [3]:

def add_padding(tokens):
    start_token = '<s>'
    end_token = '<e>'

    final_tokens = []

    for i in tokens:
        temp = [start_token] + i + [end_token]
        final_tokens.append(temp)

    return final_tokens

### Preprocessing

In [4]:
def preproceesing( filename ):
    tokens , total_tokens = tokenize(filename)
    
    padded_tokens = add_padding(tokens)
    
    return padded_tokens ,total_tokens

In [5]:
tokens, total_tokens = preproceesing('../text_data/technical_domain_corpus.txt')

### Calculating grams 

In [6]:
def unigram(tokens):
    grams = []
    
    for stmt in tokens:
        for i in stmt:
            temp = tuple([i])
            grams.append(temp)
    
    return grams

def bigram(tokens):
    
    gram = []
    
    for line in tokens:
        for i in range( len(line) - 1):
            temp = tuple( [line[i] ,line[i+1]])
            gram.append( temp)
            
    return gram 


def trigram(tokens):
    
    gram = []
    
    for line in tokens:
        for i in range( len(line) - 2):
            temp = tuple( [line[i] ,line[i+1],line[i+2]])
            gram.append(temp)
            
    return gram


def fourgram(tokens):
    gram = []
    
    for line in tokens:
        for i in range( len(line) - 3):
            temp = tuple( [line[i] ,line[i+1] , line[i+2] ,line[i+3] ])
            gram.append(temp)
    
    return gram


n_grams = [ None ,unigram(tokens) , bigram(tokens) , trigram(tokens) , fourgram(tokens) ]


In [7]:
def calc_freq( n_grams ):
    freq = [ None , {} ,{} ,{} ,{} ]
    
    for n in range(1,5):
        gram = n_grams[n]
        
        for i in gram:
            if i not in freq[n]:
                freq[n][i] = 1
            else:
                freq[n][i] += 1
                
    return freq

freq = calc_freq(n_grams)

### Kneser-Ney Smoothing 

In [46]:
def kn_recur( word , prev_words , top_order , freq , total_tokens):
    
    ## Base Condition
    if len(prev_words) == 0:
        if top_order == True:
            count = 0
            
            if word in freq[1]:
                count = freq[1][word]
        
            return ( count / total_tokens )
        else:
            num = 0

            for i in freq[2]:
                if i[1] == word:
                    num+=1

            return num / len( freq[2] )

    d = 0.7
    words = prev_words + [word]
    
    ## Compute NUmerator
    look_all = len(words)
    look_prev = len(prev_words)
    key = tuple(words)
    num1 = 0
    
    if top_order == True:
        ## Count For highest Order
        if key in freq[look_all]:
            num1 = max( freq[look_all][key] - d , 0 )
    
    else:
        temp = words[1:]
        
        for i in freq[look_all]:
            t1 = list(i)
            t2 = t1[ 1:len(t1)]
            t3 = tuple(t2)
            
            if t3 == temp:
                num1 += 1
    
    
    ## Computing Denominator
    key = tuple(prev_words)
    deno = 0
    
    if top_order == True:
        if key in freq[look_prev]:
            deno = freq[look_prev][key]
        else:
            deno = 1
    else:
        deno = len(freq[look_all])
    
    
    num2 = 0
    for i in freq[look_all]:
        t1 = list(i)
        t2 = t1[0:len(t1)-1]
        t3 =  tuple(t2)
        
        if t3 == tuple(prev_words):
            num2 += 1
    
    if num1==0 and num2==0:
        #ans = random.uniform(0.1, 0.01) +  random.uniform(0,1) * kn_recur(word ,prev_words[1:] ,False ,freq ,total_tokens)
        ans = random.uniform(0.1, 0.2) + kn_recur(word ,prev_words[1:] ,False ,freq ,total_tokens)

        return ans
    
    if num2 == 0:
        return ( num1 / deno )
    
    return (num1/deno) + d*(num2/deno)*kn_recur(word , prev_words[1:] ,False ,freq ,total_tokens)

### Witten Bell smoothing

In [43]:
def val( gram ,freq):
    ans = 0
    if len(gram) == 0 :
        for i in freq[1].values():
            ans += i
    elif gram in freq[ len(gram)].keys():
        ans = freq[len(gram)][gram]
    else:
        ans = 0
    
    return ans 

def lambdas( gram ,freq):
    
    count = 0
    
    for i in freq[ len(gram)+1 ].keys():
        if i[:-1] == gram:
            count += 1
            
    deno = val(gram ,freq)
        
    deno = deno + count
    
    if deno != 0:
        lam = count/float(deno)
        return lam
    
    return random.uniform(0.1,0.2)


def wb( words , freq):
    
    if len(words) == 1 :
        gram = tuple([words])
        num = val(gram ,freq)
        
        deno = 0
        for i in freq[1].values():
            deno += i
        
        return num/float(deno)
    
    gram = tuple(words)
    
    num = val( gram ,freq)
    deno = val( gram[:-1] ,freq)
    
    if deno != 0 :
        
        ans1 = ( 1-lambdas(gram[:-1] , freq) )
        ans2 =  lambdas(gram[:-1] ,freq ) * wb(gram[1:] ,freq)
        
        ans = ans1*(num/deno) + ans2
        
        return ans 
    
    else:
        ans = random.uniform(0.1, 0.2) +  +(lambdas(gram[:-1] ,freq) * wb(gram[1:] ,freq))
        return ans
    

### Perplexity 

In [77]:
def find_perp( stmt ,freq ,total_tokens ,smoothing):
    
    gram4 = fourgram([stmt])

    probability = 1
    prev_prob = 1
    
    for i in gram4:
        
        cur_word = i[3]
        prev_words = list(i[0:3])
        
        if smoothing == 'kn':
            temp = kn_recur(cur_word, prev_words ,True ,freq ,total_tokens)
        else:
            temp = wb(i ,freq)
        
        if temp == 0 :
            continue
        
        prev_prob = probability
        probability *= temp
        
        if probability == 0 :
            probability = prev_prob
            break
    
    deno = 1/probability
    perplexity = np.power( deno ,1/len(stmt))
    
    return perplexity

### Testing 

In [26]:
f = open('../text_data/technical_domain_corpus.txt')
lines = f.readlines()

In [32]:
idx = random.sample(range(0, len(tokens)), 1000)

test = []
train = [] 
stmts = []

total_tokens = 0
for i in range(0, len(tokens)):
    
    if i in idx:
        test.append( tokens[i] )
        stmts.append( lines[i] )
    else:
        train.append(tokens[i] )
        total_tokens += len(tokens[i])

In [33]:
n_grams = [ None ,unigram(train) , bigram(train) , trigram(train) , fourgram(train) ]

train_freq = calc_freq(n_grams)

In [82]:
tech_corpus_kney = [] 

for i in range(len(test)):
    perp = find_perp(test[i] , train_freq ,total_tokens ,'kn' )
    
    print(i,stmts[i] ,perp)
    print()
    
    tech_corpus_kney.append([stmts[i],perp])

0 Now , why do we started our discussion on chain polymerization ?
 140.42836798135795

1 Now , this anionic species or cationic species , they are not present alone in the medium , they are always associated with or accompanied by a counter ion .
 901.57714519276

2 This ketone loop complex form complex with this ionic species and then once they form complex , the reactivity of this ionic species are lost .
 803.9925361910372

3 So , as the reactivity of the ionic centre , obviously , the free is the ionic center it gets , it becomes more reactive .
 412.01221367703585

4 And if you compare with radical chain polymerization , solvent polarity actually has no such role to play in case of radical chain polymerization .
 56.7730826903011

5 Because they have seen this propagation species are similarly charged , they cannot react with each other and terminate the chains .
 1683.8421692625623

6 As a result , there is a huge increase in the concentration of these active centers in the medi

47 Bulk of the immobilization enzyme product will be in the form of carrier .
 511.35398956458477

48 This will require many of the functional groups to be present on the carrier .
 41761.23599191509

49 Large varieties of carriers have been used and are available for use fortunately and these carriers have been classified by various people in different modes , in different forms .
 491.69278365395724

50 The non - porous carriers will have a low specific surface area because the porous matrix will provide lot of internal surface in a matrix .
 140.01740146235653

51 Then in the case of proteins almost like what we talked about gel structure , collagen and gelatin are typical examples and there is a whole range of synthetic polymers which have been reported and used for immobilizing enzymes which are usually polystyrene based , vinyl polymers , polyacrylamide and so on.You can classify and a large variety of carriers are available for use for immobilizing enzymes and the choice will de

101 One category contains the factor which becomes fixed once an immobilized enzyme is prepared .
 182.56199697253194

102 Similarly the catalytic surface to volume ratio is also fixed once an immobilized enzyme preparation is available .
 1538.1806029267918

103 You can write down the expression ln S0 / S 1 k ’ 2E0t + = S0 - S Km K ’ m ( S0 - S ) From the equation two if you just simplify you get a straight line equation which can give the values of Km and vm .
 372.07281078924706

104 Here I must again put a caution .
 713.321083105547

105 When we used the Michaelis Menten equation you will recall that we had made certain assumptions and the assumptions were that we are talking of the initial reaction rate .
 167.90634452538578

106 Even while looking at the kinetics of the soluble enzyme we always look at the situations when it comes to extreme substrate conditions .
 111.35671295413309

107 Km / S0 will be large and under that condition you must assume a first order kinetics and i

153 If for example , if the monomer is electron donor it will have more tendency to react with a solvent which is electron acceptor , whereas if the monomer is a electron acceptor it will have a tendency to react with electron donating solvent , for let us compare between two monomer and two solvent and see whether , we can justify or we can have this verified from the experimental data .
 7673.298058025894

154 So , you can justify the values for methyl methacrylate , which is again a electron accepting monomer .
 1811.4653123566306

155 That means , this solvents are very poor solvents if you want to build higher molecular weight in the radical polymerization .
 202.1167467930991

156 Now , this is a typical example how this common monomers are shift or store , because most monomers when there are shift from between different countries are locally there are stabilized with small amount of this inviters .
 46.68658272084923

157 So , it is a example of a retardation there would be fou

204 When you take lower substrate concentration , 0.2M the activity loss is much , much higher and over the same period of time instead of 50 % it reaches to about 25 % and this is the case where if you recall we discussed the substrate dependent enzyme decay .
 774.5208514622101

205 We discussed two cases of enzyme deactivations during continuous operation of the reactor one was substrate independent where in the earlier case of aminoacylase substrate concentration plays no role and the deactivation constant is predominant but in the case of the aspartase , it is substrate dependent deactivation and higher the concentration of the substrate the operational life is comparatively better .
 298.07199843618724

206 In fact most of the products for example the beverages particularly Pepsi and Coke they are based on glucose fructose syrup ; they don ’ t use sucrose as a sweetener because of the use of glucose and fructose syrup in their parent countries their processes need glucose and fru

243 This is what it does .
 5.948100153898909

244 If you go back here , each polymer segment , if all of these are , say for argument , all these neighboring cells has solvent molecules , and then it will have z contacts .
 497.62182324498076

245 So , that is , what is the chance that each segment would be giving up a polymer solvent contact .
 166.25296769078864

246 So , we can write del h m is chi n 1 phi 2 into k T .
 253.38370760829258

247 Parameter is basically gives you the difference when a solvent molecule is taken out from a solvent environment and put in a polymer environment .
 10010.874573161302

248 One will be transport of the substrate from the bulk to the surface ; from surface the substrate has to transport to the site of the enzymatic reaction which means the enzyme molecules that are located throughout the immobilized enzyme particle and then the reverse process that means the product produced as a result of biochemical reaction has to transport back from the car

298 It could not only be packed bed ; it could also be stirred reactor in which the enzyme is added and the enzyme solution is allowed to be stirred along with the carrier and after a period of time when the supernatant enzyme activity has come to minimum value you stop the process .
 1436.0611050292284

299 This was originally developed by British group Barker , Emery and Novais , one of the references I have sited here , in seventy one .
 25.861287793317633

300 So the time of contact is very important and it is not used mostly more than five minutes and if you take a dilute solution probably you can go higher , followed by filtering and drying of complex at 45ºC .
 534.2464109618317

301 And these preferences will always go up if you increase the temperature .
 13.151668932357401

302 Now , how can we make sure that this counter ion and the active chain end are tightly bound ?
 289.3631792347012

303 So , this condition will satisfy that the carbon ion chain end and the lithium coun

342 Now , it is N A into P .
 222.12016028824323

343 So , total number of functional groups remaining unreacted is sum of these two .
 17.432793086683713

344 So , you need to know the concentration of each monomers very well .
 41.4912101688488

345 So , what is typically done ?
 39.16660397027444

346 One is to one plus these things plus some mono - functional monomer which is chain stopper .
 583.6669648371958

347 X bar means x n is x x bar .
 938.9514031652063

348 3 A group reactant is P cube .
 3.293034368379565

349 It is the number of molecules having in mar .
 50.4317333153394

350 So , how we will look at , how the weight fraction is distributed ?
 6863.901338508059

351 So , we can get in x by x N x total .
 2775.7329460325004

352 We have seen earlier , we can put that value and we can get this expression .
 218.46472606020507

353 These are the timer dimer which is present in maximum number .
 18.614230205420597

354 It is weight fraction .
 1501.9436246019368

355 1 , 1

400 So , if there are anti - stat additives were not used during the normal usage of the plastic material , the static can static charge can generate on the surface , which might give lot of hazard to the user .
 651.7818521001662

401 So , basically this type of may this additives make the surface self - cleanic so or easy cleanic , the cleanic of the surface is done easily .
 180.89344558991021

402 Now , if you look at the steps or the process of what happened when a polymeric surface , or any for the matter fact , any other external surface come in come in contact with biological fluid , the first step what happen , proteins present in the bio fluid , they get absorbed on the external surface .
 916.4527782689149

403 So , basically , these are the attributes , surface active additives must have to be successful in any application .
 1285.1335304173192

404 And then we last few lectures we discussed the applications of polymer , in the sense that , the different polymers properties

448 But for polymers because they cannot be vaporizable vaporize or polymers are not vaporizable , you cannot experimentally get the value of solubility parameter for the polymer because they are large in size .
 1718.5213248686239

449 If the where that interaction is maximum , the maximum swelling happens .
 123.57612654186097

450 And cohesive density energy is nothing but measure for the interaction or attractive interaction between the molecules .
 145.5788154831306

451 So , like this you can get the value for this interaction parameter for any of the polymers because the value of this are supplied by available in the literature .
 218.9700755060475

452 If you look at the polis polystyrene obviously polystyrene will not soluble in water and methanol , but it polystyrene almost soluble in most of this solvents .
 156.3403568757586

453 When the soluble enzyme is immobilized on to a matrix the interaction between the matrix and the enzyme amino acids might lead to certain conforma

500 E * is energy of activation .
 2.5622629005356288

501 The term E * can again be separated into both enthalpic and entropic contributions E * = ?
 472.31581120460885

502 It is possible to carry out the reaction .
 143.39183683801426

503 Very often a comparison of the first and the third case is not logical because the reaction conditions are different and comparing the turn over number may not be a very reasonable proposition .
 5032.284102407924

504 The catalyst might be able to perform the catalytic function by any other mechanism : by acid base catalysis or covalent catalysis which we will discuss later but the contribution made by stabilizing the transition state of the substrate will not be possible to be provided by the chemical catalyst which enzyme is able to provide and there by it increases the stability of EAB * and thereby reduces the energy of activation and leads to the formation of the product .
 3274.1529873210065

505 As a matter of fact practically there are ha

543 There will be another pore on this side of the same dimension so whatever dimension of the particle we are considering let us say if it is a pallet of thickness twelve we consider l as the thickness because the substrate will be accessible on both the sides .
 148.19074803367013

544 The substrate concentration profile for such a second order differential equation is usually obtained interms of a Thiele modulus .
 281.0165335749554

545 In the case of Michaelis Menten kinetics if we put the v ’ term which is a non - linear expression the analytical solutions are difficult and usually you approach numerical solutions which have been also reported in literature .
 101.38003648972541

546 Smaller Thiele modulus means the rate of diffusion is smaller .
 20.633893840331268

547 On the other hand for the first order reactions the Thiele modulus is independent of substrate concentration because 1 - 1 is zero and so it becomes independent of substrate concentration .
 3206.3169315537716

5

597 And in this case , we are seeing that the molecular goes up then critical temperature also comes down .
 2878.845330627224

598 So , that is another observation we can make from this expression .
 611.7376598983684

599 Now , that is because that that because the polymers are long chain and you have there is a numerous possible confirmations , because you have a rotational freedom across the backbone and if the polymer chain rotates across the backbone , obviously you will get different chains different confirmations and the backbones are quite flexible in case of polymers .
 340.70363877620343

600 So , that is the result you get , so many , so many confirmations .
 363.0621765203888

601 We are using the terms size and shape .
 15232.0863754056

602 We have talked about shape , it could be random like , worm like looks , like rod like and most cases , we deal with their enough flexible to become closer to a random coil .
 5138.917462295898

603 So , we will deal , we will discuss

649 You can land up in same polymer along with different small molecules or it can be , these polyamides can be synthesized by self condensation amino acids which will make polypeptides .
 1750.3911353137005

650 Now , what in addition ?
 92.33699701100824

651 Most addition polymer have a backbone of carbon , so according to the definition of addition polymers , it should be addition polymers and not the original definition of addition polymers .
 3278.4452023569174

652 This is the back bone .
 624.8187054253073

653 So , you need an initiator , whether the initiator is radical or cation or an anion , it has to initiate the chain .
 1455.1507191309458

654 Now , this dimer , the choice of reacting with itself or has a choice of reacting with any of the two monomers , if it reacted with this , so you get a BB , again AA .
 547.7937786021823

655 What you are getting is the polymer chains are forming in steps , dimer , trimer and tetramer and because these monomers can react this funct

688 So , the interaction will be between polymer and solvent , dilute solution will be interaction and polymer and solvent , now if there are if there are favorable interactions between the solvents and the polymer , compared to a solvent solvent interactions .
 3119.356579050733

689 So , incase of poor solvent , a single polymer chain is tried to , because of excluded volume effect tried to expand whereas , polymer solvent interaction , because they are unfavorable to compact .
 1287.2163333637795

690 So , the real dimension of polymer chain becomes same as the dimensionless unperturbed that chain ; that means it is not perturbed by any other polymer or solvent .
 1056.6502115354751

691 So , theta solvent again , so theta solvent and if your amorphous polymer , the dimension of a real polymer chain is same as the dimension of unperturbed chain .
 6629.974672884618

692 And chi the polymer solvent interaction is be lower than 0.5 , they are all inter limited , for theta solvent chi 

738 In this case if you poly A , and a block poly B , then you can actually used these copolymer , block copolymer as a comfortabalizer , between poly A and poly B , and by doing this you can actually improve the property of the blend , of poly A and poly B .
 825.247314492969

739 So , now the copolymers which will forms at the later part of your polymerization copolymerization , because the monomer composition in this case , the ratio of M 1 M 2 or different from beginning numbers .
 5536.206721209854

740 Now these monomers , actually do not undergo homopolymerization by itself , we discussed about that , and the reason for that , but they can undergo copolymerization , in fissile by , with other monomers like styrene ethynemethyne and… So , if you have the data , reaction rate data from the homopolymers ; that is not useful when you predict the composition or the reaction rate , when this monomers are reacting with the second co monomers to form a copolymerization .
 3059.111841412

773 Now , M e you can get , again T is given , T c is given , so that is your temperature reaction .
 1084.9825727303573

774 For styrene it is 310 degree centigrade , for alpha methane styrene is 61 degree centigrade , for styrene is 220 degree centigrade , what does it mean ?
 898.8106982613771

775 If you do it at lower temperature , then M e value will come down and reaction will proceed till that equilibrium value of the monomer concentration .
 571.5035148533394

776 And reverse history , if we know the values of del S and del H , we can find out the relation between the M e and T c .
 2449.3752216222642

777 Now , how does these numbers , del H and del S , vary with different monomer ?
 94.75930824389981

778 The second thing after getting the rate expression for those different mechanisms of the bi - substrate enzyme catalyzed reactions is the procedures by which one can discriminate between different mechanisms .
 322.36486039261627

779 Also show the interaction between the t

821 So what we will do in the next lecture , we will continue this , the other types of initiator , and including the remaining example of , the redox , the possibilities of redox initiator .
 78.23929657116676

822 ‘ A O C O O A ’ , I just give it example here , and B O C O O B and you can have A O C O O B .
 143.29425225554266

823 What solid by state , I mean that it is not flowing in the condition where we are using .
 297.22903636808564

824 And amorphous , which is hundred percent amorphous , zero percent crystalline region .
 23.426232978882158

825 We talk more about polymer crystallinity .
 1431.8618866718252

826 How do you induce crystallinity in the material ?
 1300.1816296808925

827 And basically what we get when we heat the polymers ?
 147.95687112514955

828 When a stretch , like stretch a amorphous polymers , then the amorphous polymer align just because it is between stretching the polymer .
 134.3990399443077

829 So , final state is a non - equilibrium state .
 39.1

872 This course as you know , deals with one of the largest group of biomolecules what we know as enzymes .
 79.60460239419277

873 We are familiar with many of the diseases which are seen when one of the enzymes stops functioning .
 239.05922982140444

874 The most significant part is that they regulate the various functions of the cell in such an accurate manner probably no other control system in any chemical process plant can think of .
 1492.4976143136541

875 So obviously their cost per unit quantity is comparably very high and coupled with their stability it becomes an expensive proposition .
 89.42859675611304

876 The paradox remains .
 1.4310805080085798

877 On the other hand you also have a very clear understanding of the genetic machinery of the cell , the microbial cell that controls the enzyme synthesis , which you will manipulate leading to hyper production of the enzyme .
 301.72930111834074

878 In isolation and purification one of the very characteristic features is 

915 A very important factor is amount of water present .
 11565.089752072217

916 In fact for calibration of the humidifiers we use saturated salt solution and each salt , when you make a saturated salt solution has definite relative humidity and those salts are circulated so as to maintain a constant water activity in the reactor and even control water activity .
 1363.8677848534999

917 They have tried to mimic the activity of chymotrypsin ; they have named it also benzyme and some of the functional groups are covalently coupled which are functional in the case of catalysis and such a complex is called a synthetic enzyme .
 191.9688363656046

918 Hence K s c would be given by e to the power del S 0 p p by R .
 1006.4966887283322

919 So , we can simply write from these two , V is V 0 plus V i e to the power minus A s L dash by 2 .
 111.31919299341676

920 So , the output signal in this detector is proportional to the concentration and if we assume that detector response is proportion

955 The other examples , but I am not spending much time on discussing those applications here .
 1574.942330015844

956 Now , the feasibility of this polymerization from a ring to linear product linear monomer to polycyclic monomer to a linear product , like all other reactions depends both on thermodynamic and thermo dynamic and last kinetic factor .
 213.1459598479574

957 Basically this is saturated ring and there is no place where u can carry out a electrophilic or neutrophillic character by external and open the ring , and then carry out the polymerization .
 89.51940306146012

958 So , basically all have a hetero atom in the ring , whether it is a or ester or amides whatever is the case .
 671.2424836440657

959 Now , poly ester poly amides are typically synthesized we have discussed earlier they are synthesized by stiff polymerization , but in this case we are making the same types of polymers , which we knew till now that they are typically synthesized by step growth polymeriz

999 The disruption of the non covalent interaction is good enough to lead to denaturation .
 76.30637000052297



In [83]:
tech_corpus_wb = []

for i in range(len(test)):
    perp = find_perp(test[i] , train_freq ,total_tokens ,'wb')
    
    print(stmts[i] ,perp)
    print()
    tech_corpus_wb.append( [stmts[i] , perp])

Now , why do we started our discussion on chain polymerization ?
 4.133339291740594

Now , this anionic species or cationic species , they are not present alone in the medium , they are always associated with or accompanied by a counter ion .
 11.351944386557843

This ketone loop complex form complex with this ionic species and then once they form complex , the reactivity of this ionic species are lost .
 11.378820658431678

So , as the reactivity of the ionic centre , obviously , the free is the ionic center it gets , it becomes more reactive .
 9.67806224148965

And if you compare with radical chain polymerization , solvent polarity actually has no such role to play in case of radical chain polymerization .
 5.546744640012036

Because they have seen this propagation species are similarly charged , they cannot react with each other and terminate the chains .
 10.314919392706834

As a result , there is a huge increase in the concentration of these active centers in the medium and as a 

Bulk of the immobilization enzyme product will be in the form of carrier .
 15.00535159559924

This will require many of the functional groups to be present on the carrier .
 12.165453223114552

Large varieties of carriers have been used and are available for use fortunately and these carriers have been classified by various people in different modes , in different forms .
 10.685439630017818

The non - porous carriers will have a low specific surface area because the porous matrix will provide lot of internal surface in a matrix .
 5.110066956515605

Then in the case of proteins almost like what we talked about gel structure , collagen and gelatin are typical examples and there is a whole range of synthetic polymers which have been reported and used for immobilizing enzymes which are usually polystyrene based , vinyl polymers , polyacrylamide and so on.You can classify and a large variety of carriers are available for use for immobilizing enzymes and the choice will depend upon variou

You can write down the expression ln S0 / S 1 k ’ 2E0t + = S0 - S Km K ’ m ( S0 - S ) From the equation two if you just simplify you get a straight line equation which can give the values of Km and vm .
 10.114656862687113

Here I must again put a caution .
 11.605102592686553

When we used the Michaelis Menten equation you will recall that we had made certain assumptions and the assumptions were that we are talking of the initial reaction rate .
 6.642329488584853

Even while looking at the kinetics of the soluble enzyme we always look at the situations when it comes to extreme substrate conditions .
 10.233911783532607

Km / S0 will be large and under that condition you must assume a first order kinetics and if the value of Km / S0 decreases the error will continue to increase .
 9.570380549167165

In the continuous stirred tank reactor one is a continuous flow reactor .
 14.34587008315085

Similarly here also if you make assumptions of zero order or first order situation when K ’ m 

That means , this solvents are very poor solvents if you want to build higher molecular weight in the radical polymerization .
 9.565202434836607

Now , this is a typical example how this common monomers are shift or store , because most monomers when there are shift from between different countries are locally there are stabilized with small amount of this inviters .
 7.997657972467989

So , it is a example of a retardation there would be four possible , which is a combination of inhibition and retardation something like this a starts at a sometimes as well as the reaction is slower , this happens if we add 0.2 percent nitro benzene .
 9.561575275672533

Now , we get it once s more time this the expression for k p is just for your reminder that this is this .
 12.428001753960245

So , we know this now from the experiment and we can find out the concentration of aim as well , so this k p and M dot are the two unknown quantities .
 9.280741838811583

Now , we know R I is minus d I by d 

In fact most of the products for example the beverages particularly Pepsi and Coke they are based on glucose fructose syrup ; they don ’ t use sucrose as a sweetener because of the use of glucose and fructose syrup in their parent countries their processes need glucose and fructose syrup .
 7.682073004944973

And , in this lecture , this tenth lecture , I will try to complete the discussion on the different type of radical initiator , which we began in the last lecture , and briefly talk about the initiator efficiency .
 17.029300202142707

For example , I have molecule like this , or you can have benzyl ketals , benzyl ketals where you have structure of that like this .
 6.20429087618215

These are the main type of photo initiator or photo chemical initiator used in commercial applications .
 4.603424939636411

So , it is , it is very , very useful to application , like film , where you can cover a part of the film , and you can keep part of the film uncovered ; where if you sign ligh

An important feature of the performance of immobilized enzyme reactor is that one needs to identify the limiting rate , the step which has a limiting rate the lowest reaction rate .
 24.319047632942695

That means the kind of reactants we have used in immobilization , the kind of matrix we have used in immobilization and whatever the interaction between the matrix and the enzyme under the conditions of immobilization , whatever conformational changes have taken place , whatever kinetic parameters have been modified as a result of this interaction are considered in the inherent parameters and we define them as ví = kí2E0S / Kím + S The parameters are just given prime and the inherent parameters have no role to play or not much can be done during the reactor design or reactor operation .
 11.708061069572418

l . When you say F is equal to one , there is no effect on the kinetic parameter Km or there is no diffusional limitation in other words .
 7.329347294664706

l . Under what conditio

So , this condition will satisfy that the carbon ion chain end and the lithium counter ion are tightly bound , strongly bound each other .
 16.907917154418485

So the monomer , incoming monomer can add to the chain from only single side .
 17.7462902275377

That , which we get , if we do reaction in the nonpolar medium at low temperature and with a , as a small counter ion , then there will be a possibility of a strong coordination between the monomer and the active chain end and the counter ion ; which will makes sure that the monomer , the incoming monomer gets into the chain from one side and which will produce a isotactic polymerization , isotactic polymers .
 14.807821919758116

We can use m m r techniques to find out the fraction of m , r or m r in the… All this we can find out quantitably from m m r experiment .
 11.03303360545082

What if… he used instead of ethylene , he has taken say alpha or say in this case propylene and used the same sort of catalyst ; alkyl aluminum plus 

It can never be more than 2 .
 3.409875594163281

That means , it will be more broader .
 8.659703801148861

6 is the least .
 5.286218722808483

The reaction may stop as soon as some concentration of the product and hydrogen ions are built up .
 23.414988887791058

In addition to that their effect on the ionization behavior and ultimately the rate effect and putting both the things together can give you a confirmatory result or conclusion on the identification of the amino acid residues on the reaction rate .
 6.990599937202895

A profile like this shows maximum value over a pH range , not a very broad range but at least in one or two units .
 8.337888964716091

One of the amino acid is ionizing and the other is not in a very highly ionised state and therefore you got a broad pH .
 7.491964563087561

Under these circumstances , the concentration of EH will be EH = E - .H + / K and the total enzyme concentration E0 = EH + E - E0 = EH + K.EH / H + = EH ( 1 + K / H + ) E0 EH = = E0.H + /

Thank you .
 1.5191913289328294

Similarly in the case of first order regime it is again the same thing that the K ' m is usually greater than 10S0 ten fold increase usually gives you a reasonable performance towards a first order regime As you have noticed that in the case of zero order regime the reactor performance is identical in both the cases in CSTR and plug flow reactor excepting that the magnitude of the tau is altered by epsilon that is voidage .
 12.388120199993839

They are any units ; don ' t bother about units .
 7.297382887126689

But these are relative ratio ; so the 1.24 .
 14.346105824053309

If the enzyme loading is constant , then the tau will also have the similar ratio because in our reactor performance equation the left hand side is K2E0 ?
 18.06891195023158

I was trying to relate productivity and the quantity of enzyme or the space time .
 4.339988712714518

The cost of the product is not very high so productivity becomes very important parameter .
 4.365998748

pH as equal to ?
 4.211780647620907

pH = 0.43e ?
 2.0613716633174346

If you notice the shift up to what I have already told you see pH effect change .
 9.360305345480342

An alkaline pH for isomerisation of glucose is not a desirable feature because it will undergo a lot of non - specific , non - catalytic , non - enzymatic reactions on glucose isomerisation producing products other than fructose .
 10.519817861115804

The change in km value can be attributed to diffusional limitations as well as partitioning of substrate .
 7.856666702686647

This has to be taken care while preparing the immobilized enzyme preparation so that the km value does not increase because otherwise it will lead to a less sufficient catalysis .
 7.249007066144561

If you look at soluble starch as substrate as a function of particle size , particle size being in micro meters the km value increases as the particle size increases .
 9.090353086233783

The other parameter which is important in the case of kineti

So , there will be drop in transparency , but it will not , may not be opaque .
 2.9176347129784723

So , higher is the refractive index , lower is the maximum transparency , one can get , when a light pass through a polymeric material , but you one can never get this high amount of transparence through a polymeric material , because there will be some in homogeneity in the refractive index the medium , even this for a homo polymer , and that will cause some amount of loss of flask of , the incoming light or transmitted light , due to scattering .
 13.286348819724264

You can actually deep that plastic material , in a liquid or similar refractive index .
 9.607804684658314

In between there are few polymers , which highly conjugative polymers , they have conductivity in the range of semi conductor , and they are called conducting polymers , and they basically not the commodity , or they are not produced in large amount .
 8.576306615619286

So , barriers polymers are having low permeab

And you know for large N , large value of N i Stirling ’ s approximation gives you ln N factorial ; this is from your previous mathematical knowledge .
 6.523289167858917

So , you have to understand that there are already i number of polymers present which means i into x number of cells are already occupied .
 8.432629130047534

So , this is the number of cells where we can place the next polymer molecule which is i plus 1th , i plus first .
 12.411020948263657

So , N 0 minus x i is the number of cells which are available .
 4.226954009042779

So , second segment can be placed in any of this z cells .
 4.351333557004933

Now , what I will do ?
 3.200498068616971

I will quickly go through the mathematical part , not plus this is minus .
 7.7092857148972955

Now z , because z is quite high so I can again put z equals to minus z minus 1 .
 7.276183303107518

So for simplicity what will be the i th chains , number of ways i th chains can be placed ; N 0 x i would be i plus 1 is i .
 10.

Ideally it should come out in a very short period of time like we have put it here but it comes out in a broadened fashion .
 6.749270002958628

After time zero as soon as you put in the tracer , there will be a dilution or mixing taking place and the tracer will get diluted and from zero concentration it will come to a concentration finally all the tracer will come out .
 10.737093324229583

The fraction f2 of the total volume of the reactor is represented as a dead zone .
 8.97296186402033

f2 is the fraction which is in the mixed zone as per this mass balance .
 16.385709822048803

distribution C / C0 , you will get a different kind of a profile from which the parameters f1and f2 can be calculated .
 7.747074285164175

The slope will be - f1 / f2 .
 3.6389404312220415

Their order of complexity increases on the basis of the number of parameters considered .
 7.275637618250344

Now , we have talked about different types of copolymers , random copolymers , copolymers , alternate copol

So , let us continue with the next page which means x n .
 7.410535166557968

You have to increase .
 8.429166151768921

In one of the last lectures or previous lectures , so in the synthesis of polycarbonate form , this phenol , a using phosgene .
 10.398124708549378

For example , you have to make sure that the functional groups are available during the entire reaction , so that some cases what happen you know these are exceptional case , where when you build a molecular weight after some size , it might feasibly turn the solution or from the melt of the reaction .
 10.791733205966173

They stay in the solution or in the reaction medium all the time during the reaction and here also , there are some exceptional cases that this equal reactivity or the functional group does not apply , but we are not talking about those cases in this course because they are exceptional cases and most of you know all those common reactions and the reactions we come across always follow this rule of a eq

So , that symmetric carbon should have some optical activity .
 3.324560740748769

Now , that happens in case of biomacro molecules where you have exclusively one such conformation present in the sample .
 5.246848818435627

So , that is too possible if you can add ; if you think of adding another one , you can have another two possibilities and so .
 16.58375763539578

So , in this case you expect that this will show optical activity unlike , say , molecule like polypropylene and indeed they do show optical activity .
 9.891573153153658

These types of polymers actually show optical activity .
 6.464637043913178

Now if it is carbon anion , then ideally it should be s p 3 but because you have a substituted group substitution here R , now we know this the monomers which undergo anionic polymerization , they actually get stabilized by resonance with this substitution .
 9.741136614388699

So , no special preference of or hindrance from one of the other side ’ s ; let us wipe it off to a

This has this , the propagating anion from cyanoacylate is highly stabilized , so you can actually anionically polymerize these cyanoacylate monomers with a weekly , very weak nucleophiles .
 6.045625566013447

Now , you can even functionalize this by such reagent and here you can make a polymer with a functional group at the end , so it can be utilized .
 6.232499914375179

So , I can write now , this is a dead chain , so the chain gets terminated and this is a weak nucleophile , as we discussed earlier .
 13.286151348284122

You take a bulky anion initiator , so that spherically , this reaction , this substitution reaction becomes problematic .
 4.3065172434221495

And as you can now visualize , that all the chains , now after initiation is complete all the chains will grow simultaneously under identical condition .
 11.121868182218133

So , we can write monomer concentration and the concentration of all the anionic , which is present in the reaction medium .
 4.60468694432415

In ca

We need to make few polymer solutions dilute .
 10.77116939682731

Otherwise those molecular weights will not contribute in the osmotic pressure and hence the determined M n number average molecular weight .
 4.65153739203935

So typically , if a polymer sample has quite a few fraction of low molecular weights which are below the cutoff of the membrane , then the M n or the number average molecular weight determined by this method typically becomes little lower estimated .
 8.919850300516606

Let us move on and try out ; some other points which we must just to add some more points in this membrane osmometry that the membranes are different for different solvent .
 9.417705946286457

So , basically we started with a VPO vapor pressure osmometry .
 7.709375004880981

Now because T f is very close to T I , we can simply write this expression at P 1 by P 1 0 as minus del H vaporization delta T e by R T i square .
 16.88823156212376

So , we can write like this RT .
 2.67955147263384

The r

In a couple of minutes you get a polymeric network of chains .
 10.820484249426853

These are the chains which are shown in the dotted lines and indicate the cross linkages which are vertical to the plane of the paper So the three dimensional network is formed and enzyme molecules are entrapped within the interstitial spaces which are created by the cross linkages that are maintained and you get a smooth solid block in the form of a gel .
 10.409965978222766

Here some of the cross linkages are shown ending with the dots .
 4.9529651994332315

Another group of entrapment methods , gel entrapment method rather , is from the long chain polymers .
 9.236726790535014

Being a structural protein , you get a real dispersion of the enzyme .
 7.49773927842745

You just add the cross linking agent after a film has been made .
 4.026198142778638

When it passes through toluene after residence time in toluene , the polymer cellulose triacetate gets coagulated ; it is no longer soluble and then th

So , in most cases it is preferred , it is done , that at the end of the reaction some additional steps are done to strip off the cyclic molecules , small cyclic molecules from the final product , so that when you take that polymer , which is in the , at the end of the reaction , which you are taking forward for molding process , which will be , have least amount of cyclic , cyclic molecule present .
 10.06622767548948

In case of absence of any cyclization , just pure linear , linear chain formation we have seen the Carother ’ s , Carother ’ s equation from earlier lectures , that X n is 1 , 1 over 1 minus P .
 7.967989636915103

Now , one thing also we should remember in case of step polymerization , that the chains , when the chains are forming during the reactions , the chains can , there is a possibility , that chains can also undergo exchange reaction between them and making the chains reacting with each other .
 8.324349110134603

But when you are starting with the monomers , it

And from this distribution the different molecular weight like number average molecular weight , weight average molecular weight and other molecules can be directly obtained .
 9.655201066769592

So , just all this benefits , but the only disadvantage is that the price of MALDI TOF is very high .
 5.591961093417286

The two factors on which the classification of enzyme reactors can be based are on the mode of charging and discharging of substrate and product stream and also physical configuration .
 17.279155074381556

There are options where even soluble enzymes can be used in a continuous reactor but for practically most applications the use of soluble enzyme is usually with the batch reactors .
 7.981614423708672

In the example I cited a little ago on the hydrolysis of penicillin to 6 - APA because of simultaneous acid production you need to control the pH otherwise the enzyme gets inactivated and a very precise control of pH is needed and therefore addition of the alkali simultane

The conformational change leads to the dissociation constant or Michaelis Menten constant for the second binding site and then second binding site also gets bound .
 8.07894143833982

This K ’ is different than the Michaelis Menten constant .
 6.166973700818084

Or S0.5 substrate concentration for 50 % maximum reaction velocity is nvK ’ .
 3.6856212471936685

Now , how to get more information with we can deduce from this .
 4.95723060871133

So , this curve now is for theta is equals to 0 and this is for c equals to 0 .
 8.272358468685004

Why we extrapolate ?
 2.189647285803901

So , this is known as zimm plot ; this is very unique way of plotting this three dimension 3 d data into two dimensional using the help of a arbitrarily chosen constant and we can get this information from this zimm plot .
 5.631431060600244

One is the solvent and the solute solution all the polymer solution should be absolutely dust free ; otherwise , the dust particles will scatter light that will interfere

In [84]:
with open('tech_corpus_kn.pkl', 'wb') as f:
    pickle.dump(tech_corpus_kney, f)
    
with open('tech_corpus_wb.pkl', 'wb') as f:
    pickle.dump(tech_corpus_wb, f)

In [85]:
with open('tech_corpus_kn.pkl', "rb") as input_file:
    tech_corpus_kn = pickle.load(input_file)
    
with open('tech_corpus_wb.pkl', "rb") as input_file:
    tech_corpus_wb = pickle.load(input_file)


### Avg Vals 

In [6]:
kn_avg = 0

for i in tech_corpus_kn:
    kn_avg += i[1]

kn_avg/1000

10.436678603971208

In [7]:
wb_avg = 0

for i in tech_corpus_wb:
    wb_avg += i[1]

wb_avg/1000

13.19003452960443

In [2]:
with open('tech_corpus_kn.pkl', "rb") as input_file:
    tech_corpus_kn = pickle.load(input_file)
    
with open('tech_corpus_wb.pkl', "rb") as input_file:
    tech_corpus_wb = pickle.load(input_file)
    
    
with open('health_corpus_kn.pkl', "rb") as input_file:
    health_corpus_kn = pickle.load(input_file)
    
with open('health_corpus_wb.pkl', "rb") as input_file:
    health_corpus_wb = pickle.load(input_file)

In [11]:
def find_avg_perp(l):
    temp = 0

    for i in l:
        
        if i[1] != np.float('inf'):
            temp += i[1]

    return temp/len(l)

In [12]:
find_avg_perp(health_corpus_kn)
find_avg_perp(health_corpus_wb)

7.2160953379390955

In [14]:
find_avg_perp(tech_corpus_kn)
find_avg_perp(tech_corpus_wb)

9.657405143189328

In [21]:
f = open("2019201017-LM3-test-perplexity.txt", "w")  

for i in tech_corpus_kn:
    string = i[0].split('\n')[0]
    
    if i[1] == np.float('inf'):
        perp = 'inf'
    else:
        perp = str(i[1])
    
    final = string + '\t' + perp + '\n'
    f.write(final)
    
    print(final)


TypeError: list indices must be integers or slices, not list