### DATA CLEANING AND TRANSFORMING

In [13]:
import pandas as pd

In [14]:
df = pd.read_csv('scrapedData.csv')

In [15]:
df

Unnamed: 0,url_id,article_title,article_text
0,37,,“If anything kills over 10 million people in t...
1,39,What Jobs Will Robots Take From Humans in The ...,AI is rapidly evolving in the employment secto...
2,42,Will machine replace the human in the future o...,Where is this disruptive technology taking us?...
3,38,What if the Creation is Taking Over the Creator?,"Human minds, a fascination in itself carrying ..."
4,41,Will AI Replace Us or Work With Us?,“Machine intelligence is the last invention th...
...,...,...,...
106,146,Blockchain for Payments,between having a tight budget and being seriou...
107,149,Business Analytics In The Healthcare Industry,Analytics is a statistical scientific process ...
108,141,Impact of COVID-19 (Coronavirus) on the Indian...,The\ncorona outbreak has hit us hard. With the...
109,148,Big Data Analytics in Healthcare,Quality and affordable healthcare is a vision ...


In [17]:
df = df.sort_values('url_id')
df

Unnamed: 0,url_id,article_title,article_text
0,37,,“If anything kills over 10 million people in t...
3,38,What if the Creation is Taking Over the Creator?,"Human minds, a fascination in itself carrying ..."
1,39,What Jobs Will Robots Take From Humans in The ...,AI is rapidly evolving in the employment secto...
5,40,Will Machine Replace The Human in the Future o...,“Anything that could give rise to smarter-than...
4,41,Will AI Replace Us or Work With Us?,“Machine intelligence is the last invention th...
...,...,...,...
106,146,Blockchain for Payments,between having a tight budget and being seriou...
103,147,The future of Investing,An investment is a resource or thing procured ...
109,148,Big Data Analytics in Healthcare,Quality and affordable healthcare is a vision ...
107,149,Business Analytics In The Healthcare Industry,Analytics is a statistical scientific process ...


In [20]:
# changing the column names of the df so it can match with the output frame

df.columns = ['URL_ID', 'ARTICLE_TITLE', 'ARTICLE_TEXT']

In [11]:
output = pd.read_excel('../Output Data Structure.xlsx')

In [12]:
output

Unnamed: 0,URL_ID,URL,POSITIVE SCORE,NEGATIVE SCORE,POLARITY SCORE,SUBJECTIVITY SCORE,AVG SENTENCE LENGTH,PERCENTAGE OF COMPLEX WORDS,FOG INDEX,AVG NUMBER OF WORDS PER SENTENCE,COMPLEX WORD COUNT,WORD COUNT,SYLLABLE PER WORD,PERSONAL PRONOUNS,AVG WORD LENGTH
0,37,https://insights.blackcoffer.com/ai-in-healthc...,,,,,,,,,,,,,
1,38,https://insights.blackcoffer.com/what-if-the-c...,,,,,,,,,,,,,
2,39,https://insights.blackcoffer.com/what-jobs-wil...,,,,,,,,,,,,,
3,40,https://insights.blackcoffer.com/will-machine-...,,,,,,,,,,,,,
4,41,https://insights.blackcoffer.com/will-ai-repla...,,,,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
109,146,https://insights.blackcoffer.com/blockchain-fo...,,,,,,,,,,,,,
110,147,https://insights.blackcoffer.com/the-future-of...,,,,,,,,,,,,,
111,148,https://insights.blackcoffer.com/big-data-anal...,,,,,,,,,,,,,
112,149,https://insights.blackcoffer.com/business-anal...,,,,,,,,,,,,,


In [22]:
# i want to perform right join on the datasets to join the both on the basis of url id
# there are 3 rows missing in our scrapedData because of 404 error

final = pd.merge(df,output, on='URL_ID', how = 'right')

In [175]:
# some of the articles' text could not be scraped because of 404 error

final[final['ARTICLE_TEXT'].isnull()==True] 

Unnamed: 0,URL_ID,ARTICLE_TITLE,ARTICLE_TEXT,URL,POSITIVE SCORE,NEGATIVE SCORE,POLARITY SCORE,SUBJECTIVITY SCORE,AVG SENTENCE LENGTH,PERCENTAGE OF COMPLEX WORDS,FOG INDEX,AVG NUMBER OF WORDS PER SENTENCE,COMPLEX WORD COUNT,WORD COUNT,SYLLABLE PER WORD,PERSONAL PRONOUNS,AVG WORD LENGTH,processed_data
7,44,,,https://insights.blackcoffer.com/how-neural-ne...,,,,,,,,,,,,,,
20,57,,,https://insights.blackcoffer.com/covid-19-envi...,,,,,,,,,,,,,,
107,144,,,https://insights.blackcoffer.com/ensuring-grow...,,,,,,,,,,,,,,


### NLP - Preprocessing

In [26]:
# !pip install nltk

Collecting nltk
  Downloading nltk-3.8.1-py3-none-any.whl (1.5 MB)
                                              0.0/1.5 MB ? eta -:--:--
     ------------                             0.5/1.5 MB 10.0 MB/s eta 0:00:01
     --------------------------               1.0/1.5 MB 10.6 MB/s eta 0:00:01
     ---------------------------------------  1.5/1.5 MB 12.0 MB/s eta 0:00:01
     ---------------------------------------- 1.5/1.5 MB 9.6 MB/s eta 0:00:00
Collecting click (from nltk)
  Using cached click-8.1.3-py3-none-any.whl (96 kB)
Collecting joblib (from nltk)
  Using cached joblib-1.2.0-py3-none-any.whl (297 kB)
Collecting regex>=2021.8.3 (from nltk)
  Downloading regex-2023.6.3-cp310-cp310-win_amd64.whl (268 kB)
                                              0.0/268.0 kB ? eta -:--:--
     -------------------------------------- 268.0/268.0 kB 8.1 MB/s eta 0:00:00
Collecting tqdm (from nltk)
  Downloading tqdm-4.65.0-py3-none-any.whl (77 kB)
                                              0

In [28]:
import nltk

In [29]:
nltk.download('stopwords')

[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\ngunj/nltk_data...
[nltk_data]   Unzipping corpora\stopwords.zip.


True

In [193]:
# importing stopwords

from nltk.corpus import stopwords

# import regular expression to remove the special characters

import re

# for tokenization
from nltk.tokenize import word_tokenize
from nltk.tokenize import sent_tokenize

### Extracting custom stopwords from the folder given

In [104]:
# import the list of stopwords

import os

custom_stopwords = []

stopwords_folder = '../StopWords'

for stopwords_file in os.listdir(stopwords_folder):
    filepath = os.path.join(stopwords_folder,stopwords_file)
    if(os.path.isfile(filepath)):                              # to check is if the file is present in that location
        with open(filepath, 'r') as file:
            words = file.read().split()
            custom_stopwords.extend(words)
            
custom_stopwords

['ERNST',
 'YOUNG',
 'DELOITTE',
 'TOUCHE',
 'KPMG',
 'PRICEWATERHOUSECOOPERS',
 'PRICEWATERHOUSE',
 'COOPERS',
 'AFGHANI',
 '|',
 'Afghanistan',
 'ARIARY',
 '|',
 'Madagascar',
 'BAHT',
 '|',
 'Thailand',
 'BALBOA',
 '|',
 'Panama',
 'BIRR',
 '|',
 'Ethiopia',
 'BOLIVAR',
 '|',
 'Venezuela',
 'BOLIVIANO',
 '|',
 'Bolivia',
 'CEDI',
 '|',
 'Ghana',
 'COLON',
 '|',
 'Costa',
 'Rica',
 'CÓRDOBA',
 '|',
 'Nicaragua',
 'DALASI',
 '|',
 'Gambia',
 'DENAR',
 '|',
 'Macedonia',
 '(Former',
 'Yug.',
 'Rep.)',
 'DINAR',
 '|',
 'Algeria',
 'DIRHAM',
 '|',
 'Morocco',
 'DOBRA',
 '|',
 'São',
 'Tom',
 'and',
 'Príncipe',
 'DONG',
 '|',
 'Vietnam',
 'DRAM',
 '|',
 'Armenia',
 'ESCUDO',
 '|',
 'Cape',
 'Verde',
 'EURO',
 '|',
 'Belgium',
 'FLORIN',
 '|',
 'Aruba',
 'FORINT',
 '|',
 'Hungary',
 'GOURDE',
 '|',
 'Haiti',
 'GUARANI',
 '|',
 'Paraguay',
 'GULDEN',
 '|',
 'Netherlands',
 'Antilles',
 'HRYVNIA',
 '|',
 'Ukraine',
 'KINA',
 '|',
 'Papua',
 'New',
 'Guinea',
 'KIP',
 '|',
 'Laos',
 'KONVERT

In [114]:
custom_stopwords = [word for word in custom_stopwords if word!='|']
custom_stopwords = [word.lower() for word in custom_stopwords]
custom_stopwords

['ernst',
 'young',
 'deloitte',
 'touche',
 'kpmg',
 'pricewaterhousecoopers',
 'pricewaterhouse',
 'coopers',
 'afghani',
 'afghanistan',
 'ariary',
 'madagascar',
 'baht',
 'thailand',
 'balboa',
 'panama',
 'birr',
 'ethiopia',
 'bolivar',
 'venezuela',
 'boliviano',
 'bolivia',
 'cedi',
 'ghana',
 'colon',
 'costa',
 'rica',
 'córdoba',
 'nicaragua',
 'dalasi',
 'gambia',
 'denar',
 'macedonia',
 '(former',
 'yug.',
 'rep.)',
 'dinar',
 'algeria',
 'dirham',
 'morocco',
 'dobra',
 'são',
 'tom',
 'and',
 'príncipe',
 'dong',
 'vietnam',
 'dram',
 'armenia',
 'escudo',
 'cape',
 'verde',
 'euro',
 'belgium',
 'florin',
 'aruba',
 'forint',
 'hungary',
 'gourde',
 'haiti',
 'guarani',
 'paraguay',
 'gulden',
 'netherlands',
 'antilles',
 'hryvnia',
 'ukraine',
 'kina',
 'papua',
 'new',
 'guinea',
 'kip',
 'laos',
 'konvertibilna',
 'marka',
 'bosnia-herzegovina',
 'koruna',
 'czech',
 'republic',
 'krona',
 'sweden',
 'krone',
 'denmark',
 'kroon',
 'estonia',
 'kuna',
 'croatia',


### Steps for preprocessing the data:
Remove the special characters <br>
Convert the entire data to upper or lower case <br>
Tokenize the data <br>
Remove stopwords <br>

In [140]:
def remove_special_characters(original_text):
    if(pd.notnull(original_text)):     
        pattern = r'[^a-zA-Z0-9\s]'
    #     pattern = r'[^\w\s]'
        preprocessed_text = re.sub(pattern,' ',original_text)
        preprocessed_text = re.sub('\n',' ',preprocessed_text)
        preprocessed_text = re.sub('\xa0',' ',preprocessed_text)        
        return preprocessed_text    

def convert_lowercase(original_text):
    if pd.notnull(original_text):
        return original_text.lower()

def tokenization(original_text):
    if pd.notnull(original_text):
        return word_tokenize(original_text)        # returns a list of tokens

def remove_stopwords(token_list):
    if token_list is not None:
        stop_words = stopwords.words('english')
        return [word for word in token_list if word not in custom_stopwords+stop_words]
    
# combine all the above steps into one function

def preprocessing(original_text):
    return remove_stopwords(tokenization(convert_lowercase(remove_special_characters(original_text))))

In [142]:
# making a new column to store the cleaned list of words

final['processed_data'] = final['ARTICLE_TEXT'].apply(preprocessing)

### NLP - Analysis

In [146]:
# we need the dictionary in this section

dict_folder_path = '../MasterDictionary'

os.listdir(dict_folder_path)

['negative-words.txt', 'positive-words.txt']

In [157]:
# making list of positive and negative words from the files 

neg_words_filepath = os.path.join(dict_folder_path,'negative-words.txt')
with open(neg_words_filepath,'r') as file:
    negative_words = file.read().split()
    
pos_words_filepath = os.path.join(dict_folder_path,'positive-words.txt')
with open(pos_words_filepath,'r') as file:
    positive_words = file.read().split()
    

In [188]:
# positive score

def calculate_positive_score(list_of_words):
    if list_of_words is not None:
        return sum(word in positive_words for word in list_of_words)
    
# negative score

def calculate_negative_score(list_of_words):
    if list_of_words is not None:
        return sum(word in negative_words for word in list_of_words)
    
# polarity score 
    
def calculate_polarity(pos_score, neg_score):
    if pd.notnull(pos_score):
        return (pos_score-neg_score)/((pos_score+neg_score)+0.000001)
    
# subjectivity score 
    
def calculate_subjectivity(pos_score, neg_score, processed_data_list):
    if pd.notnull(pos_score):
        total_words = len(processed_data_list)
        return (pos_score+neg_score)/(total_words+0.000001)
    


In [169]:
final['POSITIVE SCORE'] = final['processed_data'].apply(calculate_positive_score)

In [177]:
final['NEGATIVE SCORE'] = final['processed_data'].apply(calculate_negative_score)

In [183]:
# axis =1 denotes the row-wise operations

final['POLARITY SCORE'] = final.apply(lambda each_row: calculate_polarity(each_row['POSITIVE SCORE'], each_row['NEGATIVE SCORE']), axis=1)

In [189]:
final['SUBJECTIVITY SCORE'] = final.apply(lambda each_row: calculate_subjectivity(each_row['POSITIVE SCORE'], each_row['NEGATIVE SCORE'], each_row['processed_data']), axis=1)

### Analysis of Readability

In [218]:
# average sentence length

def calculate_avg_sent_len(article_text):
    if pd.notnull(article_text):
        cleaned_text = remove_special_characters(article_text)    # removes characters such as " , " so that it wont be counted when counting the words
        num_of_words = len(word_tokenize(cleaned_text))           
        num_of_sents = len(sent_tokenize(article_text))
        if num_of_sents>0:
            return num_of_words/num_of_sents
        else:
            return 0
        

In [220]:
final['AVG SENTENCE LENGTH'] = final['ARTICLE_TEXT'].apply(calculate_avg_sent_len)

In [223]:
# the percentage of complex words
# we will determine if the word is complex on the basis of syllables
# if the number of syllables exceed 2 we can say it is complex

# !pip install syllables

Collecting syllables
  Downloading syllables-1.0.7-py3-none-any.whl (15 kB)
Collecting cmudict<2.0.0,>=1.0.11 (from syllables)
  Downloading cmudict-1.0.13-py3-none-any.whl (939 kB)
                                              0.0/939.3 kB ? eta -:--:--
     --------------                        358.4/939.3 kB 10.9 MB/s eta 0:00:01
     -------------------------------       809.0/939.3 kB 10.2 MB/s eta 0:00:01
     -------------------------------------- 939.3/939.3 kB 9.9 MB/s eta 0:00:00
Collecting importlib-metadata<6.0.0,>=5.1.0 (from syllables)
  Downloading importlib_metadata-5.2.0-py3-none-any.whl (21 kB)
Collecting importlib-resources<6.0.0,>=5.10.1 (from cmudict<2.0.0,>=1.0.11->syllables)
  Downloading importlib_resources-5.12.0-py3-none-any.whl (36 kB)
Collecting zipp>=0.5 (from importlib-metadata<6.0.0,>=5.1.0->syllables)
  Downloading zipp-3.15.0-py3-none-any.whl (6.8 kB)
Installing collected packages: zipp, importlib-resources, importlib-metadata, cmudict, syllables
Succes

In [249]:
import syllables

def calculate_complex_percentage(article_text):
    if pd.notnull(article_text):
        cleaned_text = remove_special_characters(article_text)    # removes characters such as " , " so that it wont be counted when counting the words
        list_of_words = word_tokenize(cleaned_text)
        num_of_words = len(list_of_words)  
        if num_of_words>0:
            num_of_complex_words = sum(syllables.estimate(word)>=3 for word in list_of_words)
            return (num_of_complex_words/num_of_words)*100
        else:
            return 0
        
        
        
# syllables.estimate(word) counts the number of syllables in the word    

In [250]:
final['PERCENTAGE OF COMPLEX WORDS'] = final['ARTICLE_TEXT'].apply(calculate_complex_percentage)

In [252]:
# fog index

def calculate_fog_index(average_words_per_sentence, percentage_complex_words):
    return 0.4 * (average_words_per_sentence + percentage_complex_words)

In [253]:
final['FOG INDEX'] = final.apply(lambda eachrow : calculate_fog_index(eachrow['AVG SENTENCE LENGTH'],eachrow['PERCENTAGE OF COMPLEX WORDS']),axis=1)

In [257]:
# average number of words per sentence is same as avg sentence length

final['AVG NUMBER OF WORDS PER SENTENCE'] = final['ARTICLE_TEXT'].apply(calculate_avg_sent_len)

In [260]:
# complex word count 

def calculate_complex_words(article_text):
    if pd.notnull(article_text):
        cleaned_text = remove_special_characters(article_text)    # removes characters such as " , " so that it wont be counted when counting the words
        list_of_words = word_tokenize(cleaned_text)
        return sum(syllables.estimate(word)>=3 for word in list_of_words)   


In [261]:
final['COMPLEX WORD COUNT'] = final['ARTICLE_TEXT'].apply(calculate_complex_words)

In [282]:
# total cleaned words

def calculate_word_count(article_text):
    if pd.notnull(article_text):
        cleaned_text = convert_lowercase(remove_special_characters(article_text))        
        list_of_words = word_tokenize(cleaned_text)
        stop_words = stopwords.words('english')
        list_of_words = [word for word in list_of_words if word not in stop_words]
        return len(list_of_words) 

In [283]:
final['WORD COUNT'] = final['ARTICLE_TEXT'].apply(calculate_word_count)

In [279]:
# syllable per word

def syllable_count(article_text):
    if pd.notnull(article_text):       
        cleaned_text = convert_lowercase(remove_special_characters(article_text))   
        list_of_words = word_tokenize(cleaned_text)
        stop_words = stopwords.words('english')
        list_of_words = [word for word in list_of_words if word not in stop_words]
        syllable_count_list = [syllables.estimate(word) for word in list_of_words]
        return syllable_count_list    


In [280]:
final['SYLLABLE PER WORD'] = final['ARTICLE_TEXT'].apply(syllable_count)

In [287]:
# avg word length

def calculate_avg_word_length(word_list):
    if word_list is not None:
        total_words = len(word_list)
        total_characters = sum(len(word) for word in word_list)

        if total_words > 0:
            average_length = total_characters / total_words
            return average_length
        else:
            return 0

In [288]:
final['AVG WORD LENGTH'] = final['processed_data'].apply(calculate_avg_word_length)

In [289]:
final

Unnamed: 0,URL_ID,ARTICLE_TITLE,ARTICLE_TEXT,URL,POSITIVE SCORE,NEGATIVE SCORE,POLARITY SCORE,SUBJECTIVITY SCORE,AVG SENTENCE LENGTH,PERCENTAGE OF COMPLEX WORDS,FOG INDEX,AVG NUMBER OF WORDS PER SENTENCE,COMPLEX WORD COUNT,WORD COUNT,SYLLABLE PER WORD,PERSONAL PRONOUNS,AVG WORD LENGTH,processed_data
0,37,,“If anything kills over 10 million people in t...,https://insights.blackcoffer.com/ai-in-healthc...,3.0,11.0,-0.571429,0.245614,22.800000,29.824561,21.049825,22.800000,34.0,68.0,"[3, 1, 1, 2, 2, 1, 3, 2, 3, 2, 2, 1, 3, 3, 1, ...",,7.701754,"[kills, 10, people, decades, highly, infectiou..."
1,38,What if the Creation is Taking Over the Creator?,"Human minds, a fascination in itself carrying ...",https://insights.blackcoffer.com/what-if-the-c...,8.0,5.0,0.230769,0.419355,22.250000,21.348315,17.439326,22.250000,19.0,48.0,"[2, 1, 4, 2, 3, 3, 3, 2, 1, 5, 2, 2, 3, 2, 3, ...",,7.322581,"[human, minds, fascination, carrying, potentia..."
2,39,What Jobs Will Robots Take From Humans in The ...,AI is rapidly evolving in the employment secto...,https://insights.blackcoffer.com/what-jobs-wil...,7.0,2.0,0.555555,0.118421,19.000000,32.236842,20.494737,19.000000,49.0,93.0,"[1, 3, 3, 3, 2, 5, 2, 3, 3, 3, 3, 4, 4, 3, 2, ...",,7.407895,"[rapidly, evolving, employment, sector, matter..."
3,40,Will Machine Replace The Human in the Future o...,“Anything that could give rise to smarter-than...,https://insights.blackcoffer.com/will-machine-...,6.0,0.0,1.000000,0.285714,23.000000,19.565217,17.026087,23.000000,9.0,28.0,"[3, 1, 1, 1, 2, 2, 5, 1, 4, 5, 1, 3, 4, 4, 2, ...",,7.333333,"[give, rise, smarter, human, intelligence, for..."
4,41,Will AI Replace Us or Work With Us?,“Machine intelligence is the last invention th...,https://insights.blackcoffer.com/will-ai-repla...,1.0,0.0,0.999999,0.200000,13.000000,30.769231,17.507692,13.000000,4.0,8.0,"[3, 5, 1, 3, 4, 2, 1, 1]",,8.000000,"[machine, intelligence, invention, humanity, m..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
109,146,Blockchain for Payments,between having a tight budget and being seriou...,https://insights.blackcoffer.com/blockchain-fo...,1.0,2.0,-0.333333,0.081081,17.428571,7.377049,9.922248,17.428571,9.0,60.0,"[1, 2, 3, 2, 1, 2, 4, 2, 2, 1, 1, 2, 2, 2, 1, ...",,6.135135,"[tight, budget, stretched, thin, dangerously, ..."
110,147,The future of Investing,An investment is a resource or thing procured ...,https://insights.blackcoffer.com/the-future-of...,1.0,0.0,0.999999,0.032258,23.333333,30.000000,21.333333,23.333333,21.0,36.0,"[3, 3, 1, 3, 4, 3, 1, 4, 6, 3, 1, 2, 3, 3, 4, ...",,7.225806,"[investment, resource, thing, procured, object..."
111,148,Big Data Analytics in Healthcare,Quality and affordable healthcare is a vision ...,https://insights.blackcoffer.com/big-data-anal...,4.0,2.0,0.333333,0.250000,14.500000,29.310345,17.524138,14.500000,17.0,30.0,"[3, 4, 3, 2, 3, 2, 2, 1, 1, 3, 3, 1, 3, 5, 4, ...",,8.083333,"[quality, affordable, healthcare, vision, gove..."
112,149,Business Analytics In The Healthcare Industry,Analytics is a statistical scientific process ...,https://insights.blackcoffer.com/business-anal...,2.0,0.0,1.000000,0.068966,23.500000,38.297872,24.719149,23.500000,18.0,29.0,"[4, 4, 3, 2, 4, 3, 3, 2, 1, 2, 3, 4, 2, 1, 4, ...",,8.379310,"[analytics, statistical, scientific, process, ..."


### OUTPUT

In [290]:
# desirable format for output

final.drop(columns = ['ARTICLE_TITLE','ARTICLE_TEXT','processed_data'], inplace=True)

In [293]:
final.head()

Unnamed: 0,URL_ID,URL,POSITIVE SCORE,NEGATIVE SCORE,POLARITY SCORE,SUBJECTIVITY SCORE,AVG SENTENCE LENGTH,PERCENTAGE OF COMPLEX WORDS,FOG INDEX,AVG NUMBER OF WORDS PER SENTENCE,COMPLEX WORD COUNT,WORD COUNT,SYLLABLE PER WORD,PERSONAL PRONOUNS,AVG WORD LENGTH
0,37,https://insights.blackcoffer.com/ai-in-healthc...,3.0,11.0,-0.571429,0.245614,22.8,29.824561,21.049825,22.8,34.0,68.0,"[3, 1, 1, 2, 2, 1, 3, 2, 3, 2, 2, 1, 3, 3, 1, ...",,7.701754
1,38,https://insights.blackcoffer.com/what-if-the-c...,8.0,5.0,0.230769,0.419355,22.25,21.348315,17.439326,22.25,19.0,48.0,"[2, 1, 4, 2, 3, 3, 3, 2, 1, 5, 2, 2, 3, 2, 3, ...",,7.322581
2,39,https://insights.blackcoffer.com/what-jobs-wil...,7.0,2.0,0.555555,0.118421,19.0,32.236842,20.494737,19.0,49.0,93.0,"[1, 3, 3, 3, 2, 5, 2, 3, 3, 3, 3, 4, 4, 3, 2, ...",,7.407895
3,40,https://insights.blackcoffer.com/will-machine-...,6.0,0.0,1.0,0.285714,23.0,19.565217,17.026087,23.0,9.0,28.0,"[3, 1, 1, 1, 2, 2, 5, 1, 4, 5, 1, 3, 4, 4, 2, ...",,7.333333
4,41,https://insights.blackcoffer.com/will-ai-repla...,1.0,0.0,0.999999,0.2,13.0,30.769231,17.507692,13.0,4.0,8.0,"[3, 5, 1, 3, 4, 2, 1, 1]",,8.0


In [294]:
final.to_csv('../Final_Output.csv',index=False)