In [None]:
from google.colab import drive
drive.mount('/content/drive')
import os
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer

### To generate the dataframe

In [None]:
#construct the dataframe with the 11 years
d = {'ID': [], '2013': [],'2014': [],'2015': [],'2016': [],'2017': [],'2018': [],'2019': [],'2020': [],'2021': [],'2022': [],'2023': []}
df = pd.DataFrame(data=d)

In [None]:
path='/content/drive/MyDrive/CSE256_SP23/AUP_project/snapshots/' #input your path here to the data

In [None]:
filesnames=os.listdir(path) #get the names of all the files
filesnames.remove('snapshots_url.csv')

In [None]:
#input:list of file names
#returns: list of contents of those files
def content(to_analyze):
  documents=[]
  for file in to_analyze:
    with open(file) as f:
      documents.append(f.read())
      f.close()
  return documents

In [None]:
#input: the current file ID and all of the file names
#returns: array of files ordered by year -> 2013, 2014, 2015,...2023
def sort_id_files(n,id_file_names):
  years=[]
  for t in id_file_names:
    #print(t.split('/'+n+'-')[1].split('.txt')[0])
    years.append(int(t.split('/'+n+'-')[1].split('.txt')[0]))
  years.sort()
  #print(years)
  sorted_file_order=[]
  for y in years:
    for t in id_file_names:
      if str(y) in t:
        sorted_file_order.append(t)
  return sorted_file_order

In [None]:
#input: the current file ID and all of the file names
#returns: dictionary of cosine similarity between the earliest seen file (usually 2013) and every subsequent file, where  key:value is year:cosine similarity
def id2change(n,id_file_names):
  sorted_file_order=sort_id_files(n,id_file_names)
  docs=content(sorted_file_order)
  documents=[d for d in docs if len(d)!=0] #take out empty files
  to_add={}
  if len(documents)>0:
    tfidf = TfidfVectorizer().fit_transform(documents)
    # no need to normalize, since Vectorizer will return normalized tf-idf
    pairwise_similarity = tfidf * tfidf.T
    arr=pairwise_similarity.toarray()
    for i in sorted_file_order:
      input_doc = content([i])[0]
      if input_doc!='':
        input_idx = documents.index(input_doc)
        ret=float(arr[0][input_idx])*100
        y=int(i.split('-')[1].split('.txt')[0])
        to_add['ID']=str(i.split('-')[0].split('snapshots/')[1])
        to_add[str(y)]=round(ret,2)
  return to_add

In [None]:
#input: dataframe, the current file ID, and all of the file names of that ID
#returns: appended dataframe with all ids and years with data
def create_final_dict(dataframe,n,id_file_names):
  dataframe=dataframe.append(id2change(n,id_file_names),ignore_index=True)
  return dataframe

In [None]:
#input: path to data, all filenames
#returns: dictionary id:filenames of that id
def get_ids_years(path,all_filenames):
  ret={}
  for i in all_filenames:
    n=i.split('-')[0]
    if n in ret:
      ret[n].append(path+i)
    else:
      ret[n]=[path+i]
  return ret

In [None]:
ids2files=get_ids_years(path,filesnames)

In [None]:
#populate df
for n in ids2files:
  print(n)
  df=create_final_dict(df,n,ids2files[n])
  print('&&&&&')

In [None]:
#save df to csv file
#df.to_csv('/content/drive/MyDrive/CSE256_SP23/AUP_project/final_change_over_time.csv')

In [None]:
#helper function for debugging and manual case analysis
#input: the current file ID and all of the file names
#returns: pairwise similarity matrix
def get_matrix(n,id_file_names):
  sorted_file_order=sort_id_files(n,id_file_names)
  docs=content(sorted_file_order)
  documents=[d for d in docs if len(d)!=0] #take out empty files
  if len(documents)>0:
    tfidf = TfidfVectorizer().fit_transform(documents)
    # no need to normalize, since Vectorizer will return normalized tf-idf
    pairwise_similarity = tfidf * tfidf.T
    arr=pairwise_similarity.toarray()
  return arr

### After I generated the dataframe

In [None]:
from google.colab import drive
drive.mount('/content/drive')
import pandas as pd
import numpy as np
import statistics

In [None]:
def content(to_analyze):
  documents=[]
  for file in to_analyze:
    with open(file) as f:
      documents.append(f.read())
      f.close()
  return documents
path='/content/drive/MyDrive/CSE256_SP23/AUP_project/snapshots/'

In [None]:
path_final_figs='/content/drive/MyDrive/CSE256_SP23/AUP_project/figs/'

In [None]:
df=pd.read_csv('/content/drive/MyDrive/CSE256_SP23/AUP_project/final_change_over_time.csv')
df=df.drop(columns=['Unnamed: 0'])
df['ID']=df['ID'].astype(str).apply(lambda x: x.split('.')[0]) #turn IDs from string(floats) to str(ints)
df.at[123,'2021']=np.nan #wayback gives something strange for this one
df.at[69,'2023']=np.nan #wayback gives something diff for this one (69 the index!! not the file ID!!)
df.at[123,'2022']=np.nan #massive header when in reality it's the same
df.at[136,'2023']=34.62 #avast needed a 2023 datapoint

In [None]:
final_ids=['134','68','135','20','21','57','56','6','199','143','142','177','176','63','62','14','149','148','100','101','204','205','37','36','123','122','154','155','40','41','129','128','75','160','161','117','116','91','90','30','31','203','124','79','125','78','153','152','47','46','188','189','73','72','167','166','182','183','110','111','96','97','159','133','132','27','26','1','0','50','51','19','144','18','145','170','171','64','139','65','138','13','12','81','80','107','106','195','77','76','163','162','114','115','48','186','187','92','93','207','206','169','34','168','120','99','157','156','43','42','29','174','175','60','61','85','84','16','191','190','103','102','137','136','23','22','54','109','55','108','4','140','141','173','172','67','66','1011','82','83','59','104','58','196','8','9','197','130','131','24','179','25','2','3','53','52','88','89','146','70','71','164','39','165','38','181','180','113','112','95','33','200','201','127','126','150','119','44','118','45']

In [None]:
filtered_aups=df[df.ID.isin(final_ids)]

In [None]:
 #find all rows that only contain NAN, 99% and 100% -> those stay the same
check=[]
for i in filtered_aups.ID.unique():
  curr=filtered_aups[filtered_aups.ID==i]
  all_vals=[]
  for j in range(13,24):
    try:
      all_vals.append(round(list(curr['20'+str(j)].unique())[0])) #this will round 98.9 to 99
    except:
      continue
  if len(set(all_vals))==2:
    if (99 in all_vals) and (100 in all_vals):
      check.append(i)
  if len(set(all_vals))==1:
    if (99 in all_vals) or (100 in all_vals):
      check.append(i)

In [None]:
len(check)*100/181==63.53591160220994

In [None]:
stayed_the_same=df[df.ID.isin(check)]

In [None]:
from matplotlib import pyplot as plt
x=['20'+str(c) for c in range(13,24)]
for i in range(0,len(stayed_the_same)):
  all_vals=stayed_the_same.values.tolist()[i]
  y=all_vals[1:len(all_vals)]
  plt.plot(x,y)
  plt.scatter(x,y,label=all_vals[0])
plt.ylabel('TF-IDF % Similarity compared to 2013')
plt.xlabel('Year')
plt.title('AUP Change Over Time')
plt.ylim(0,110)
#plt.legend()

In [None]:
changed=[]
for i in final_ids:
  if i in check:
    continue
  else:
    changed.append(i)
changed_df=df[df.ID.isin(changed)]
from matplotlib import pyplot as plt
x=['20'+str(c) for c in range(13,24)]
for i in range(0,64):
  all_vals=changed_df.values.tolist()[i]
  y=all_vals[1:len(all_vals)]
  plt.plot(x,y)
  plt.scatter(x,y,label=all_vals[0])
plt.ylabel('TF-IDF % Similarity compared to 2013')
plt.xlabel('Year')
plt.title('AUP Change Over Time')
plt.ylim(0,110)
plt.legend(ncol=2, loc='lower left',bbox_to_anchor=(1,0.4))

In [None]:
interesting=changed_df[changed_df.ID.isin(['201','135','104'])]

In [None]:
interesting

In [None]:
id2name={'104':'Forbin','135':'Triton Digital','201':'Avast'}

In [None]:
x=['20'+str(c) for c in range(13,24)]
i=0
all_vals=interesting.values.tolist()[i]
y=all_vals[1:len(all_vals)]
plt.figure(figsize=(7,2))
plt.plot(x,y)
plt.scatter(x,y,label='Randomly selected Wayback snapshot')
plt.ylabel('TF-IDF % Similar to 2013')
plt.xlabel('Year')
plt.title(id2name[all_vals[0]] + '\'s AUP Change Over Time')
plt.ylim(70,105)
plt.legend(ncol=1,loc='upper right')
plt.autoscale()
plt.savefig(path_final_figs+'104_forbin.pdf', bbox_inches="tight")

In [None]:
i=1
all_vals=interesting.values.tolist()[i]
y=all_vals[1:len(all_vals)]
#plt.figure(figsize=(7,2))
plt.plot(x,y,color='red')
plt.scatter(x,y,label='Randomly selected Wayback snapshot',color='red')
plt.ylabel('TF-IDF % Similar to 2013')
plt.xlabel('Year')
plt.title('Triton Digital' + '\'s AUP Change Over Time')
plt.ylim(70,105)
plt.legend(ncol=1,loc='upper right')
plt.autoscale()
plt.savefig(path_final_figs+'135_tritondigital.pdf', bbox_inches="tight")

In [None]:
i=2
all_vals=interesting.values.tolist()[i]
y=all_vals[1:len(all_vals)]
plt.plot(x,y,color='green')
plt.scatter(x,y,label='Randomly selected Wayback snapshot',color='green')
plt.ylabel('TF-IDF % Similar to 2013')
plt.xlabel('Year')
plt.title(id2name[all_vals[0]] + '\'s AUP Change Over Time')
plt.ylim(30,105)
plt.legend(ncol=1,loc='upper right')
plt.autoscale()
plt.savefig(path_final_figs+'201_smartsheet.pdf', bbox_inches="tight")

## Bag of Words Analysis for the 3 case studies - ended up omitting

### 201 - Avast - The most change

In [None]:
doc_names=[path+'201-20'+str(j)+'.txt' for j in range(13,24)]

In [None]:
y2013=['Acceptable Use Policy\nWe all know that the Internet is a wide-open place to explore, learn, and have fun.  We want to help make sure that you can do that safely, securely, and privately.  At AVAST, we like to believe that everyone has the best of intentions and clearly knows where the lines of right and wrong are drawn in the sand of the Internet.  Help us make sure that the web stays a great place to spend your time by not doing things that authorities would view as illegal or inappropriate.  So please don’t use our service to send or receive pirated movies or music, send defamatory material or do something bad like that . If we see anything that is over the line, we are obligated to turn it over to the authorities and we may have to shut off your access.  Be responsible citizens and just do the right things while using AVAST products!  If you see anything bad going on, fire off an email to']

In [None]:
y2014=['Acceptable Use Policy\nWe all know that the Internet is a wide-open place to explore, learn, and have fun. We want to help make sure that you can do that safely, securely, and privately. At AVAST, we like to believe that everyone has the best of intentions and clearly knows where the lines of right and wrong are drawn in the sand of the Internet. Help us make sure that the web stays a great place to spend your time by not doing things that authorities would view as illegal or inappropriate. So please don’t use our service to send or receive pirated movies or music, send defamatory material or do something bad like that. We cannot and do not actively monitor for this. However, if we receive a complaint about misuse (for instance, from a copyright owner), we will investigate and take action if the complaint is correct. This is standard procedure for reputable providers of VPN services.  Be responsible citizens and just do the right things while using AVAST products! If you see anything bad going on, fire off an email to']

In [None]:
y2015=['Acceptable Use Policy\nWe all know that the Internet is a wide-open place to explore, learn, and have fun. We want to help make sure that you can do that safely, securely, and privately. At Avast, we like to believe that everyone has the best of intentions and clearly knows where the lines of right and wrong are drawn in the sand of the Internet. Help us make sure that the web stays a great place to spend your time by not doing things that authorities would view as illegal or inappropriate. So please don’t use our service to send or receive pirated movies or music, send defamatory material or do something bad like that. We cannot and do not actively monitor for this. However, if we receive a complaint about misuse (for instance, from a copyright owner), we will investigate and take action if the complaint is correct. This is standard procedure for reputable providers of VPN services.  Be responsible citizens and just do the right things while using Avast products! If you see anything bad going on, fire off an email to .\nIf you have any questions or concerns regarding Acceptable Use Policy, please feel free to contact our customer support']

In [None]:
y2016=['Acceptable Use Policy\nWe all know that the Internet is a wide-open place to explore, learn, and have fun. We want to help make sure that you can do that safely, securely, and privately. At Avast, we like to believe that everyone has the best of intentions and clearly knows where the lines of right and wrong are drawn in the sand of the Internet. Help us make sure that the web stays a great place to spend your time by not doing things that authorities would view as illegal or inappropriate. So please don’t use our service to send or receive pirated movies or music, send defamatory material or do something bad like that. We cannot and do not actively monitor for this. However, if we receive a complaint about misuse (for instance, from a copyright owner), we will investigate and take action if the complaint is correct. This is standard procedure for reputable providers of VPN services.  Be responsible citizens and just do the right things while using Avast products! If you see anything bad going on, fire off an email to .\nIf you have any questions or concerns regarding Acceptable Use Policy, please feel free to contact our customer support.']

In [None]:
y2017=['Acceptable Use Policy\nWe all know that the Internet is a wide-open place to explore, learn, and have fun. We want to help make sure that you can do that safely, securely, and privately. At Avast, we like to believe that everyone has the best of intentions and clearly knows where the lines of right and wrong are drawn in the sand of the Internet. Help us make sure that the web stays a great place to spend your time by not doing things that authorities would view as illegal or inappropriate. So please don’t use our service to send or receive pirated movies or music, send defamatory material or do something bad like that. We cannot and do not actively monitor for this. However, if we receive a complaint about misuse (for instance, from a copyright owner), we will investigate and take action if the complaint is correct. This is standard procedure for reputable providers of VPN services.  Be responsible citizens and just do the right things while using Avast products! If you see anything bad going on, fire off an email to .\nIf you have any questions or concerns regarding Acceptable Use Policy, please feel free to contact our customer support.']

In [None]:
y2018=['Acceptable use policy\nWe all know that the Internet is a wide-open place to explore, learn, and have fun. We want to help make sure that you can do that safely, securely, and privately. At Avast, we like to believe that everyone has the best of intentions and clearly knows where the lines of right and wrong are drawn in the sand of the Internet. Help us make sure that the web stays a great place to spend your time by not doing things that authorities would view as illegal or inappropriate. So please don’t use our service to send or receive pirated movies or music, send defamatory material or do something bad like that. We cannot and do not actively monitor for this. However, if we receive a complaint about misuse (for instance, from a copyright owner), we will investigate and take action if the complaint is correct. This is standard procedure for reputable providers of VPN services.  Be responsible citizens and just do the right things while using Avast products! If you see anything bad going on, fire off an email to .\nIf you have any questions or concerns regarding Acceptable Use Policy, please feel free to contact our customer support.']

In [None]:
y2019=["This Acceptable Use Policy (this “Policy”) describes prohibited uses of software or services, including any updates (each, a “Solution”) offered by Avast Software, s.r.o. or its affiliates identified here (“Vendor”), and any user manuals and instructions provided with the Solution (“Documentation”). The examples described in this Policy are not exhaustive. We may modify this Policy at any time by posting a revised version on this website. If you violate the Policy or authorize or help others to do so, we may immediately suspend or terminate your use of any or all of our Solutions. Capitalized terms not otherwise defined by this Policy have the meanings given by the End User License Agreement (the “Agreement”) for the Solutions available here.\n1.\tYou may not copy or use any Solution or Documentation except as authorized by the Agreement.\n2.\tYou may not, and may not permit any third party to:\n2.1.\tuse any authorization code, license number, username/password combination or other activation code or number supplied by Vendor in connection with any Solution (“Activation Code”) on, or for, more than the number of Devices specified by the Applicable Conditions;\n2.2.\tdisclose any Activation Code to any party other than Vendor or Vendor’s designated representatives;\n2.3.\texcept as expressly authorized by law: (i) reverse engineer, disassemble, decompile, translate, reconstruct, transform or extract any Solution or any portion of the Solution (including any related malware signatures and malware detection routines); or (ii) change, modify or otherwise alter any Solution (including any related malware signatures and malware detection routines);\n2.4.\texcept as authorized by a distribution agreement, reseller agreement or other agreement between you and Vendor or other member of Vendor Group, publish, resell, distribute, broadcast, transmit, communicate, transfer, pledge, rent, share or sublicense any Solution;\n2.5.\texcept as expressly authorized by the Agreement, the Applicable Conditions or another agreement between you and Vendor or other member of Vendor Group, use any Solution to manage the facilities of a third party or grant any third party access to or use of any Solution on a service bureau, timesharing, subscription service or application service provider or other similar basis;\n2.6.\tuse any Solution to provide or build a product or service that competes with the Solution;\n2.7.\tuse or attempt to use any Solution to: (i) violate this Policy; (ii) upload, download, stream, transmit, copy or store any information, data, or materials, or engage or assist in any activity, that may: (A) infringe the intellectual property rights or other rights of any third party; (B) contain any unlawful, harmful, threatening, abusive, defamatory or otherwise objectionable material of any kind; (C) harm or attempt to harm others; (D) have the potential to incite or produce conduct that is unlawful, harmful, threatening, abusive, harassing, tortious, defamatory, libelous, vulgar, obscene, invasive of another's privacy, hateful, or racially, ethnically, religiously or sexually discriminatory or otherwise objectionable; (E) promote or provide instructional information about illegal activities, promote physical harm or injury against any group or individual, or promote any act of cruelty to animals; (F) impersonate any person or entity or otherwise misrepresent your affiliation with a person or entity; or (G) assist any fraud, deception or theft; (H) damage, disable or impair the operation of, or gain or attempt to gain unauthorized access, receipt, use, copying, alteration or destruction of or to, any property, Devices, software, services, networks or data by any means, including by hacking, phishing, spoofing or seeking to circumvent or defeat any firewalls, password protection or other information security protections or controls of whatever nature; (iii) in any way violate any applicable local, national or international law or regulation; (iv) forge headers or otherwise manipulate identifiers in order to disguise the origin of any content transmitted through the use of the Solution; (v) upload, post, email or otherwise transmit any unsolicited or unauthorized advertising, promotional materials, “junk mail,” “spam,” “chain letters,” or “pyramid schemes”; or (vi) collect or store personal data without the knowledge and express consent of the data subject;\n2.8.\tdamage, disable or impair the operation of, or gain or attempt to gain unauthorized access to, any Solution or to any property, Devices, software, services, networks or data connected to, or inter-operating with, such Solution, or to any content or data stored, accessed or delivered through such Solution, by any means, including by hacking, phishing, spoofing or seeking to circumvent or defeat any firewalls, password protection or other information security protections or controls of whatever nature;\n2.9.\ttest or benchmark, or disclose or publish testing or benchmark results, for any Solution without Vendor’s prior written consent; or\n2.10.\tdefeat or circumvent, attempt to defeat or circumvent, or authorize or assist any third party in defeating or circumventing controls on the use of copies of any Solution.\n3.\tConflicts. The Agreement may include similar or additional restrictions on your use of a Solution. If and to the extent this Policy and the Agreement conflict, the more restrictive provisions will govern."]

In [None]:
y2020=["This Acceptable Use Policy (this “Policy”) describes prohibited uses of software or services, including any updates (each, a “Solution”) offered by Avast Software, s.r.o. or its affiliates identified here (“Vendor”), and any user manuals and instructions provided with the Solution (“Documentation”). The examples described in this Policy are not exhaustive. We may modify this Policy at any time by posting a revised version on this website. If you violate the Policy or authorize or help others to do so, we may immediately suspend or terminate your use of any or all of our Solutions. Capitalized terms not otherwise defined by this Policy have the meanings given by the End User License Agreement (the “Agreement”) for the Solutions available here.\n1.\tYou may not copy or use any Solution or Documentation except as authorized by the Agreement.\n2.\tYou may not, and may not permit any third party to:\n2.1.\tuse any authorization code, license number, username/password combination or other activation code or number supplied by Vendor in connection with any Solution (“Activation Code”) on, or for, more than the number of Devices specified by the Applicable Conditions;\n2.2.\tdisclose any Activation Code to any party other than Vendor or Vendor’s designated representatives;\n2.3.\texcept as expressly authorized by law: (i) reverse engineer, disassemble, decompile, translate, reconstruct, transform or extract any Solution or any portion of the Solution (including any related malware signatures and malware detection routines); or (ii) change, modify or otherwise alter any Solution (including any related malware signatures and malware detection routines);\n2.4.\texcept as authorized by a distribution agreement, reseller agreement or other agreement between you and Vendor or other member of Vendor Group, publish, resell, distribute, broadcast, transmit, communicate, transfer, pledge, rent, share or sublicense any Solution;\n2.5.\texcept as expressly authorized by the Agreement, the Applicable Conditions or another agreement between you and Vendor or other member of Vendor Group, use any Solution to manage the facilities of a third party or grant any third party access to or use of any Solution on a service bureau, timesharing, subscription service or application service provider or other similar basis;\n2.6.\tuse any Solution to provide or build a product or service that competes with the Solution;\n2.7.\tuse or attempt to use any Solution to: (i) violate this Policy; (ii) upload, download, stream, transmit, copy or store any information, data, or materials, or engage or assist in any activity, that may: (A) infringe the intellectual property rights or other rights of any third party; (B) contain any unlawful, harmful, threatening, abusive, defamatory or otherwise objectionable material of any kind; (C) harm or attempt to harm others; (D) have the potential to incite or produce conduct that is unlawful, harmful, threatening, abusive, harassing, tortious, defamatory, libelous, vulgar, obscene, invasive of another\'s privacy, hateful, or racially, ethnically, religiously or sexually discriminatory or otherwise objectionable; (E) promote or provide instructional information about illegal activities, promote physical harm or injury against any group or individual, or promote any act of cruelty to animals; (F) impersonate any person or entity or otherwise misrepresent your affiliation with a person or entity; or (G) assist any fraud, deception or theft; (H) damage, disable or impair the operation of, or gain or attempt to gain unauthorized access, receipt, use, copying, alteration or destruction of or to, any property, Devices, software, services, networks or data by any means, including by hacking, phishing, spoofing or seeking to circumvent or defeat any firewalls, password protection or other information security protections or controls of whatever nature; (iii) in any way violate any applicable local, national or international law or regulation; (iv) forge headers or otherwise manipulate identifiers in order to disguise the origin of any content transmitted through the use of the Solution; (v) upload, post, email or otherwise transmit any unsolicited or unauthorized advertising, promotional materials, “junk mail,” “spam,” “chain letters,” or “pyramid schemes”; or (vi) collect or store personal data without the knowledge and express consent of the data subject;\n2.8.\tdamage, disable or impair the operation of, or gain or attempt to gain unauthorized access to, any Solution or to any property, Devices, software, services, networks or data connected to, or inter-operating with, such Solution, or to any content or data stored, accessed or delivered through such Solution, by any means, including by hacking, phishing, spoofing or seeking to circumvent or defeat any firewalls, password protection or other information security protections or controls of whatever nature;\n2.9.\ttest or benchmark, or disclose or publish testing or benchmark results, for any Solution without Vendor’s prior written consent; or\n2.10.\tdefeat or circumvent, attempt to defeat or circumvent, or authorize or assist any third party in defeating or circumventing controls on the use of copies of any Solution.\n3.\tConflicts. The Agreement may include similar or additional restrictions on your use of a Solution. If and to the extent this Policy and the Agreement conflict, the more restrictive provisions will govern."]

In [None]:
y2021=["This Acceptable Use Policy (this “Policy”) describes prohibited uses of software or services, including any updates (each, a “Solution”) offered by Avast Software, s.r.o. or its affiliates identified here (“Vendor”), and any user manuals and instructions provided with the Solution (“Documentation”). The examples described in this Policy are not exhaustive. We may modify this Policy at any time by posting a revised version on this website. If you violate the Policy or authorize or help others to do so, we may immediately suspend or terminate your use of any or all of our Solutions. Capitalized terms not otherwise defined by this Policy have the meanings given by the End User License Agreement (the “Agreement”) for the Solutions available here.\n1.\tYou may not copy or use any Solution or Documentation except as authorized by the Agreement.\n2.\tYou may not, and may not permit any third party to:\n2.1.\tuse any authorization code, license number, username/password combination or other activation code or number supplied by Vendor in connection with any Solution (“Activation Code”) on, or for, more than the number of Devices specified by the Applicable Conditions;\n2.2.\tdisclose any Activation Code to any party other than Vendor or Vendor’s designated representatives;\n2.3.\texcept as expressly authorized by law: (i) reverse engineer, disassemble, decompile, translate, reconstruct, transform or extract any Solution or any portion of the Solution (including any related malware signatures and malware detection routines); or (ii) change, modify or otherwise alter any Solution (including any related malware signatures and malware detection routines);\n2.4.\texcept as authorized by a distribution agreement, reseller agreement or other agreement between you and Vendor or other member of Vendor Group, publish, resell, distribute, broadcast, transmit, communicate, transfer, pledge, rent, share or sublicense any Solution;\n2.5.\texcept as expressly authorized by the Agreement, the Applicable Conditions or another agreement between you and Vendor or other member of Vendor Group, use any Solution to manage the facilities of a third party or grant any third party access to or use of any Solution on a service bureau, timesharing, subscription service or application service provider or other similar basis;\n2.6.\tuse any Solution to provide or build a product or service that competes with the Solution;\n2.7.\tuse or attempt to use any Solution to: (i) violate this Policy; (ii) upload, download, stream, transmit, copy or store any information, data, or materials, or engage or assist in any activity, that may: (A) infringe the intellectual property rights or other rights of any third party; (B) contain any unlawful, harmful, threatening, abusive, defamatory or otherwise objectionable material of any kind; (C) harm or attempt to harm others; (D) have the potential to incite or produce conduct that is unlawful, harmful, threatening, abusive, harassing, tortious, defamatory, libelous, vulgar, obscene, invasive of another\'s privacy, hateful, or racially, ethnically, religiously or sexually discriminatory or otherwise objectionable; (E) promote or provide instructional information about illegal activities, promote physical harm or injury against any group or individual, or promote any act of cruelty to animals; (F) impersonate any person or entity or otherwise misrepresent your affiliation with a person or entity; or (G) assist any fraud, deception or theft; (H) damage, disable or impair the operation of, or gain or attempt to gain unauthorized access, receipt, use, copying, alteration or destruction of or to, any property, Devices, software, services, networks or data by any means, including by hacking, phishing, spoofing or seeking to circumvent or defeat any firewalls, password protection or other information security protections or controls of whatever nature; (iii) in any way violate any applicable local, national or international law or regulation; (iv) forge headers or otherwise manipulate identifiers in order to disguise the origin of any content transmitted through the use of the Solution; (v) upload, post, email or otherwise transmit any unsolicited or unauthorized advertising, promotional materials, “junk mail,” “spam,” “chain letters,” or “pyramid schemes”; or (vi) collect or store personal data without the knowledge and express consent of the data subject;\n2.8.\tdamage, disable or impair the operation of, or gain or attempt to gain unauthorized access to, any Solution or to any property, Devices, software, services, networks or data connected to, or inter-operating with, such Solution, or to any content or data stored, accessed or delivered through such Solution, by any means, including by hacking, phishing, spoofing or seeking to circumvent or defeat any firewalls, password protection or other information security protections or controls of whatever nature;\n2.9.\ttest or benchmark, or disclose or publish testing or benchmark results, for any Solution without Vendor’s prior written consent; or\n2.10.\tdefeat or circumvent, attempt to defeat or circumvent, or authorize or assist any third party in defeating or circumventing controls on the use of copies of any Solution.\n3.\tConflicts. The Agreement may include similar or additional restrictions on your use of a Solution. If and to the extent this Policy and the Agreement conflict, the more restrictive provisions will govern."]

In [None]:
y2022=["This Acceptable Use Policy (this “Policy”) describes prohibited uses of software or services, including any updates (each, a “Solution”) offered by Avast Software, s.r.o. or its affiliates identified here (“Vendor”), and any user manuals and instructions provided with the Solution (“Documentation”). The examples described in this Policy are not exhaustive. We may modify this Policy at any time by posting a revised version on this website. If you violate the Policy or authorize or help others to do so, we may immediately suspend or terminate your use of any or all of our Solutions. Capitalized terms not otherwise defined by this Policy have the meanings given by the End User License Agreement (the “Agreement”) for the Solutions available here.\n1.\tYou may not copy or use any Solution or Documentation except as authorized by the Agreement.\n2.\tYou may not, and may not permit any third party to:\n2.1.\tuse any authorization code, license number, username/password combination or other activation code or number supplied by Vendor in connection with any Solution (“Activation Code”) on, or for, more than the number of Devices specified by the Applicable Conditions;\n2.2.\tdisclose any Activation Code to any party other than Vendor or Vendor’s designated representatives;\n2.3.\texcept as expressly authorized by law: (i) reverse engineer, disassemble, decompile, translate, reconstruct, transform or extract any Solution or any portion of the Solution (including any related malware signatures and malware detection routines); or (ii) change, modify or otherwise alter any Solution (including any related malware signatures and malware detection routines);\n2.4.\texcept as authorized by a distribution agreement, reseller agreement or other agreement between you and Vendor or other member of Vendor Group, publish, resell, distribute, broadcast, transmit, communicate, transfer, pledge, rent, share or sublicense any Solution;\n2.5.\texcept as expressly authorized by the Agreement, the Applicable Conditions or another agreement between you and Vendor or other member of Vendor Group, use any Solution to manage the facilities of a third party or grant any third party access to or use of any Solution on a service bureau, timesharing, subscription service or application service provider or other similar basis;\n2.6.\tuse any Solution to provide or build a product or service that competes with the Solution;\n2.7.\tuse or attempt to use any Solution to: (i) violate this Policy; (ii) upload, download, stream, transmit, copy or store any information, data, or materials, or engage or assist in any activity, that may: (A) infringe the intellectual property rights or other rights of any third party; (B) contain any unlawful, harmful, threatening, abusive, defamatory or otherwise objectionable material of any kind; (C) harm or attempt to harm others; (D) have the potential to incite or produce conduct that is unlawful, harmful, threatening, abusive, harassing, tortious, defamatory, libelous, vulgar, obscene, invasive of another's privacy, hateful, or racially, ethnically, religiously or sexually discriminatory or otherwise objectionable; (E) promote or provide instructional information about illegal activities, promote physical harm or injury against any group or individual, or promote any act of cruelty to animals; (F) impersonate any person or entity or otherwise misrepresent your affiliation with a person or entity; or (G) assist any fraud, deception or theft; (H) damage, disable or impair the operation of, or gain or attempt to gain unauthorized access, receipt, use, copying, alteration or destruction of or to, any property, Devices, software, services, networks or data by any means, including by hacking, phishing, spoofing or seeking to circumvent or defeat any firewalls, password protection or other information security protections or controls of whatever nature; (iii) in any way violate any applicable local, national or international law or regulation; (iv) forge headers or otherwise manipulate identifiers in order to disguise the origin of any content transmitted through the use of the Solution; (v) upload, post, email or otherwise transmit any unsolicited or unauthorized advertising, promotional materials, “junk mail,” “spam,” “chain letters,” or “pyramid schemes”; or (vi) collect or store personal data without the knowledge and express consent of the data subject;\n2.8.\tdamage, disable or impair the operation of, or gain or attempt to gain unauthorized access to, any Solution or to any property, Devices, software, services, networks or data connected to, or inter-operating with, such Solution, or to any content or data stored, accessed or delivered through such Solution, by any means, including by hacking, phishing, spoofing or seeking to circumvent or defeat any firewalls, password protection or other information security protections or controls of whatever nature;\n2.9.\ttest or benchmark, or disclose or publish testing or benchmark results, for any Solution without Vendor’s prior written consent; or\n2.10.\tdefeat or circumvent, attempt to defeat or circumvent, or authorize or assist any third party in defeating or circumventing controls on the use of copies of any Solution.\n3.\tConflicts. The Agreement may include similar or additional restrictions on your use of a Solution. If and to the extent this Policy and the Agreement conflict, the more restrictive provisions will govern."]

In [None]:
y2023=["This Acceptable Use Policy (this “Policy”) describes prohibited uses of software or services, including any updates (each, a “Solution”) offered by Avast Software, s.r.o. or its affiliates identified here (“Vendor”), and any user manuals and instructions provided with the Solution (“Documentation”). The examples described in this Policy are not exhaustive. We may modify this Policy at any time by posting a revised version on this website. If you violate the Policy or authorize or help others to do so, we may immediately suspend or terminate your use of any or all of our Solutions. Capitalized terms not otherwise defined by this Policy have the meanings given by the End User License Agreement (the “Agreement”) for the Solutions available here.\n1.\tYou may not copy or use any Solution or Documentation except as authorized by the Agreement.\n2.\tYou may not, and may not permit any third party to:\n2.1.\tuse any authorization code, license number, username/password combination or other activation code or number supplied by Vendor in connection with any Solution (“Activation Code”) on, or for, more than the number of Devices specified by the Applicable Conditions;\n2.2.\tdisclose any Activation Code to any party other than Vendor or Vendor’s designated representatives;\n2.3.\texcept as expressly authorized by law: (i) reverse engineer, disassemble, decompile, translate, reconstruct, transform or extract any Solution or any portion of the Solution (including any related malware signatures and malware detection routines); or (ii) change, modify or otherwise alter any Solution (including any related malware signatures and malware detection routines);\n2.4.\texcept as authorized by a distribution agreement, reseller agreement or other agreement between you and Vendor or other member of Vendor Group, publish, resell, distribute, broadcast, transmit, communicate, transfer, pledge, rent, share or sublicense any Solution;\n2.5.\texcept as expressly authorized by the Agreement, the Applicable Conditions or another agreement between you and Vendor or other member of Vendor Group, use any Solution to manage the facilities of a third party or grant any third party access to or use of any Solution on a service bureau, timesharing, subscription service or application service provider or other similar basis;\n2.6.\tuse any Solution to provide or build a product or service that competes with the Solution;\n2.7.\tuse or attempt to use any Solution to: (i) violate this Policy; (ii) upload, download, stream, transmit, copy or store any information, data, or materials, or engage or assist in any activity, that may: (A) infringe the intellectual property rights or other rights of any third party; (B) contain any unlawful, harmful, threatening, abusive, defamatory or otherwise objectionable material of any kind; (C) harm or attempt to harm others; (D) have the potential to incite or produce conduct that is unlawful, harmful, threatening, abusive, harassing, tortious, defamatory, libelous, vulgar, obscene, invasive of another's privacy, hateful, or racially, ethnically, religiously or sexually discriminatory or otherwise objectionable; (E) promote or provide instructional information about illegal activities, promote physical harm or injury against any group or individual, or promote any act of cruelty to animals; (F) impersonate any person or entity or otherwise misrepresent your affiliation with a person or entity; or (G) assist any fraud, deception or theft; (H) damage, disable or impair the operation of, or gain or attempt to gain unauthorized access, receipt, use, copying, alteration or destruction of or to, any property, Devices, software, services, networks or data by any means, including by hacking, phishing, spoofing or seeking to circumvent or defeat any firewalls, password protection or other information security protections or controls of whatever nature; (iii) in any way violate any applicable local, national or international law or regulation; (iv) forge headers or otherwise manipulate identifiers in order to disguise the origin of any content transmitted through the use of the Solution; (v) upload, post, email or otherwise transmit any unsolicited or unauthorized advertising, promotional materials, “junk mail,” “spam,” “chain letters,” or “pyramid schemes”; or (vi) collect or store personal data without the knowledge and express consent of the data subject;\n2.8.\tdamage, disable or impair the operation of, or gain or attempt to gain unauthorized access to, any Solution or to any property, Devices, software, services, networks or data connected to, or inter-operating with, such Solution, or to any content or data stored, accessed or delivered through such Solution, by any means, including by hacking, phishing, spoofing or seeking to circumvent or defeat any firewalls, password protection or other information security protections or controls of whatever nature;\n2.9.\ttest or benchmark, or disclose or publish testing or benchmark results, for any Solution without Vendor’s prior written consent; or\n2.10.\tdefeat or circumvent, attempt to defeat or circumvent, or authorize or assist any third party in defeating or circumventing controls on the use of copies of any Solution.\n3.\tConflicts. The Agreement may include similar or additional restrictions on your use of a Solution. If and to the extent this Policy and the Agreement conflict, the more restrictive provisions will govern."]

In [None]:
doc_contents_pre=[y2013[0],y2014[0],y2015[0],y2016[0],y2017[0],y2018[0]]
doc_contents_post=[y2019[0],y2020[0],y2021[0],y2022[0],y2023[0]]

In [None]:
import numpy as np
from sklearn.feature_extraction.text import CountVectorizer

vectorizer = CountVectorizer()
#
# Create sample set of documents
#
docs = np.array(doc_contents_pre)
#
# Fit the bag-of-words model
#
bag = vectorizer.fit_transform(docs)
#
# Get unique words / tokens found in all the documents. The unique words / tokens represents
# the features
#
print(vectorizer.get_feature_names_out())
#
# Associate the indices with each unique word
#
print(vectorizer.vocabulary_)
#
# Print the numerical feature vector
#
print(bag.toarray())

In [None]:
import numpy as np
from sklearn.feature_extraction.text import CountVectorizer

vectorizer = CountVectorizer()
#
# Create sample set of documents
#
docs = np.array(doc_contents_post)
#
# Fit the bag-of-words model
#
bag = vectorizer.fit_transform(docs)
#
# Get unique words / tokens found in all the documents. The unique words / tokens represents
# the features
#
print(vectorizer.get_feature_names_out())
#
# Associate the indices with each unique word
#
print(vectorizer.vocabulary_)
#
# Print the numerical feature vector
#
print(bag.toarray())

In [None]:
y2015

In [None]:
y2023

In [None]:
for t in y2015[0].split(' '):
  for j in y2016[0].split(' '):
    if t!=j:
      print(t)
      print(j)