This is sample web crawling code. In this notebook, the links to articles published by The Print on all the six selected topics are used to get the article text. These articles are added to a dataframe and saved.

In [None]:
from urllib.request import urlopen, Request
from bs4 import BeautifulSoup
import pandas as pd
import re
from selenium import webdriver
from time import sleep
import time
import random
import pickle
import sys

In [None]:
#Header object
hdr = {'User-Agent': 'Mozilla/5.0'}

In [None]:
#Defining a function that takes a link and returns the soup version of the html page
def link_to_soup(link_pg):
    req = Request(link_pg, headers=hdr)
    page = urlopen(req) 
    soup_pg = BeautifulSoup(page)
    return soup_pg

In [None]:
#Defining a function for regex pattern matching
def pattern_finder(pattern, text):
    matches = re.findall(pattern, text, re.IGNORECASE)
    
    if not matches:
        return ["no match"]
    else:
        return matches

In [None]:
#Function to find the tags in an article
def tags_finder(tags_html):
    pattern = r">([\w ]+)<"
    tags_matches = pattern_finder(pattern, str(tags_html))
    return tags_matches

In [None]:
#To check if our topic is one of the tags in the article; also extracting the article's title, tag, and description
def topic_article_checker(soup_page, link, topic):    
    
    #Finding the header in the article
    header_obj = soup_page.find("header", class_ = "td-post-title")
    
    #Getting the article's title
    title_html = header_obj.find("h1", class_ = "entry-title")
    pattern = r">([^<>]+)<"
    title = pattern_finder(pattern, str(title_html))[0]
    
    
    #Getting the article's description
    desc_html = header_obj.find("h2", class_ = "td-post-sub-title")
    pattern = r">([^<>]+)<"
    desc = pattern_finder(pattern, str(desc_html))[0]
    
    
    #The date can also be captured here
    date_html = header_obj.find("span", class_ = "update_date")
    pattern = r">([^<>]+) IST<"
    date = pattern_finder(pattern, str(date_html))[0]
        
    
    #Creating a dictionary to map topics and keywords
    topic_keyword_dict = {"farm laws": ['farm', 'agri', 'crop', 'msp', 'paddy'], 
                          "rafale": ['rafale', 'dassault', 'french', 'france'],
                          "article 370": ['article 370', 'srinagar', 'kashmir', 'j&k', 'j&amp;', 'mufti', 'abdullah'],
                          "sabarimala": ['sabarimala', 'ayyappa'],
                          "section 377": ['section 377', 'lgbt', 'gay'],
                          "caa": ["caa", "nrc", "citizen", "shaheen"]
                         }
    
    
    #We can use the article's tags from the website, if the article has tags
    try:
        #Getting the tags from the article
        tags_container = soup_page.find("div", class_ = "td-post-source-tags")
        tags_html = tags_container.find_all("li")
        pattern = r">([^<>]+)<"
        tags = pattern_finder(pattern, str(tags_html))
    
    except:
        #This is if the article doesn't have tags
        
        #Checking if the article's tag or title is listed in the dictionary
        if topic in topic_keyword_dict.keys():
            
            #Fetching the list of keywords for the current topic
            keywords_list = topic_keyword_dict.get(topic)
            
            #Checking if the article is on the current topic
            if (any(keyw in title.lower() for keyw in keywords_list) or 
                any(keyw in link.lower() for keyw in keywords_list)):
            
                    return True, title, desc, date
            
            else:
                    return False, title, desc, date
            
        else:
            raise Exception("Invalid topic")
            
    
    #If the tags were available, then we can use them check to if the article is on the correct topic
    for tag in tags:
        keywords_list = topic_keyword_dict.get(topic)
        
        if any(keyw in tag.lower() for keyw in keywords_list):
            return True, title, desc, date
            
        
    #If none of the conditions have been met, then we can say that this article is not on the current topic
    return False, title, desc, date


In [None]:
#Defining a function to remove leading and trailing white spaces
def remove_whitespace_trail_lead(text):
    
    #Removing trailing white spaces (retaining line breaks)
    text = re.sub(r"[ \t]+$", "", text)
    
    #Removing leading whitespaces
    text = re.sub(r"^[ \t]+", "", text)
    
    return text

In [None]:
#Defining a function to take an article's soup page and add the article to the dataframe
def article_adder(soup_page, link, topic):   
    
    #Checking if the article is already in the dataframe
    if not theprint_df['Link'].str.contains(link).any():
        
        #Checking if the article has the relevant topic tag
        check, title, desc, date = topic_article_checker(soup_page, link, topic)
        
        if check:
            
            #Deleting the "contribution" box
            div_obj = soup_page.find("div", class_ = "post_contribute")
            div_obj.clear()
            
            #Finding the author of the article
            author_html = soup_page.find("a", rel = "author")
            pattern = r">([^<>]+)<"
            authors = pattern_finder(pattern, str(author_html))[0]
                        
            
            #Extracting the article text
            #Note that this is not the full name of the class, but BeautifulSoup doesn't require the full class name
            article_body = soup_page.find("div", class_ = "td-post-content")
            
            #Finding the paragraphs within the article body
            article_paras = article_body.find_all(["p", "h3"], class_ = None, recursive=True)
            
            
            #Removing the paras that are links to other articles
            #The find() function returns -1 if a substring is not found in the parent string
            article_text_paras = [i for i in article_paras if (str(i).lower().find("also read")==-1) and 
                                 (str(i).lower().find("read |")==-1) and (str(i).lower().find("read|")==-1) and 
                                 (str(i).lower().find("read :")==-1) and (str(i).lower().find("read:")==-1) and
                                 (str(i).lower().find("watch |")==-1) and (str(i).lower().find("watch|")==-1) and
                                 (str(i).lower().find("watch :")==-1) and (str(i).lower().find("watch:")==-1) and
                                 (str(i).lower().find("see |")==-1) and (str(i).lower().find("see|")==-1) and 
                                 (str(i).lower().find("also see:")==-1) and (str(i).lower().find("also see :")==-1)]
            
            
            #Removing the paras that contain videos embedded
            article_text_paras = [i for i in article_text_paras if str(i).find("<iframe")==-1]
            
            #Extracting the text from these paras
            pattern = r">([^<>]+)<"
            text_paras = pattern_finder(pattern, str(article_text_paras))
                
    
            #Removing leading and trailing whitespaces from the text
            text_paras_cleaned = [remove_whitespace_trail_lead(t) for t in text_paras if t != ', ']
            desc_cleaned = [remove_whitespace_trail_lead(desc)]
            

            #Checking for city/location name in the first paragraph
            pattern = "^[A-Z]{1,15}[A-Za-z\s ()\\\/]{0,15}:([^<>]*$)"
            result = pattern_finder(pattern, text_paras_cleaned[0])
            
            
            #Checking if the city/location name is in the second paragraph
            if result[0] == "no match":
                                
                #Inserting the result in place of the second paragraph if there is a match in the second paragraph
                result = pattern_finder(pattern, text_paras_cleaned[1])
                if result[0] != "no match":
                    text_paras_cleaned[1] = result[0]
            
            #Inserting the result in place of the first paragraph if there is a match
            else:
                text_paras_cleaned[0] = result[0]
            
            
            #Merging the string items in the list of text paras
            text_paras_joined = ' '.join(text_paras_cleaned)
            
            
            #Making sure that there is a description in the article
            if (desc_cleaned[0] != "no match"):                
                text_paras_fin = desc_cleaned + [text_paras_joined]
            
            #In this branch, there's no description
            else:
                text_paras_fin = [text_paras_joined]
                
            
            #Adding the article description to the list
            article_text = ''.join(text_paras_fin)            #Joining without a space here
                               
            
            #Setting the index value where the new row is to be inserted
            if theprint_df.empty:
                row_id = 0
            else:
                row_id = theprint_df.index[-1]
                row_id += 1


            #Adding this article's details to the dataframe
            theprint_df.loc[row_id] = [title] + [link] + [date] + [authors] + [topic] + [article_text]
            
            return 1
            
        else:
            print("Article is not about current topic")
            print("Link to article: ", link)
            return 0
        
    else:
        print("Link is already present in the dataframe")
        print("Link to article: ", link)
        return -1


Let's load the articles from disk.

In [None]:
def links_unpickle(topic):
    
    #Mapping topics to file names
    
    topic_filename = {"farm laws": 'links to articles_farm_laws.txt', 
                      "rafale": 'links to articles_rafale.txt',
                      "article 370": 'links to articles_article_370.txt',
                      "sabarimala": 'links to articles_sabarimala.txt',
                      "section 377": 'links to articles_section_377.txt',
                      "caa": 'links to articles_caa.txt'
                     }
    
    #Unpickling the relevant file
    with open(topic_filename.get(topic), "rb") as fp:
        links_list = pickle.load(fp)
    
    print("Number of links on the {} topic is {}.".format(topic, len(links_list)))
    
    return links_list
    

We can extract articles from these links. Let's define a function for this.

In [None]:
def link_to_article(links_to_add, topic):
    
    #Initializing a few variables and lists
    article_counter = 0
    already_present = 0
    other_topics = []
    exceptions_list = []

    #Looping through the links and adding them to the dataframe
    for link in links_to_add:

        try:
            soup_page = link_to_soup(link)

            #Adding the article and collecting the returned value
            check = article_adder(soup_page, link, topic)

            #Tracking the links that were not about our current topic
            if check==0:
                other_topics.append(link)
            elif check==1:
                article_counter += check
            else:
                already_present += 1


                
            sleep(random.randint(3, 15))

            print("Articles added: ", article_counter)


        except Exception as e:    

            #Adding the link to the exceptions list
            exceptions_list.append(link)

            print("Ran into a problem")
            # get the exception information
            error_type, error_obj, error_info = sys.exc_info()      

            #print error info and line that threw the exception                          
            print(error_type, 'Line:', error_info.tb_lineno)
            print("Error object: ", error_obj)

            continue
    
    print("Number of articles added: ", article_counter)
    print("Number of articles that were already in the dataframe: ", already_present)
    
    return other_topics, exceptions_list

In [None]:
#Creating an empty dataframe to store articles
theprint_df = pd.DataFrame(columns=['Title', 'Link', 'Date', 'Authors', 'Topic', 'Article'])
print(theprint_df.shape)
theprint_df.head()

(0, 6)


Unnamed: 0,Title,Link,Date,Authors,Topic,Article


Let's unpickle the links topic-wise and add the articles to our dataframe.

#### 1. Farm Laws

In [None]:
#Setting the topic
topic = 'farm laws'

#Unpickling the links
links_list = links_unpickle(topic)

Number of links on the farm laws topic is 1480.


In [None]:
#Removing duplicate links
links_list = list(set(links_list))

#Dropping links that have the term 'video' in them
links_list = [l for l in links_list if 'video' not in l]
print("Number of links after cleaning: ", len(links_list))

Number of links after cleaning:  1468


Adding these articles to our dataframe.

In [None]:
#Collecting the links that were not added to the dataframe
topic_other_list_1, exceptions_links_list_1 = link_to_article(links_list[:15], topic)

Articles added:  1
Articles added:  2
Article is not about current topic
Link to article:  https://theprint.in/india/ayodhya-to-covid-india-is-marching-forward-says-pm-modi-in-open-letter-to-country/432178/
Articles added:  2
Article is not about current topic
Link to article:  https://theprint.in/india/governance/to-return-or-not-kashmiris-driven-out-of-uttarakhand-colleges-dont-know-what-to-do/202143/
Articles added:  2
Article is not about current topic
Link to article:  https://theprint.in/thought-shot/prasoon-joshi-on-being-fair-to-domestic-helpers-valmik-thapar-blames-china-for-ignoring-nature/414726/
Articles added:  2
Article is not about current topic
Link to article:  https://theprint.in/economy/reduce-cost-of-business-reform-land-acquisition-bjp-suggests-plan-for-post-covid-economy/407426/
Articles added:  2
Article is not about current topic
Link to article:  https://theprint.in/india/governance/why-the-cbi-failed-to-prove-its-sohrabuddin-fake-encounter-case/167434/
Article

In [None]:
theprint_df

Unnamed: 0,Title,Link,Date,Authors,Topic,Article
0,This Mohali village has no mobile or internet ...,https://theprint.in/health/in-mohali-poor-heal...,"6 June, 2021 1:22 pm",Ananya Bhardwaj,farm laws,The Mohali administration has put up all 341 v...
1,"AAP supports farmers’ Bharat Bandh call too, a...",https://theprint.in/india/aap-supports-farmers...,"6 December, 2020 4:05 pm",ANI,farm laws,Delhi Environment Minister and AAP leader Gopa...
2,Modi govt must treat farmers as businessmen &a...,https://theprint.in/opinion/modi-govt-must-tre...,"13 September, 2019 10:15 am",Shweta Saini,farm laws,If Narendra Modi wants to double farmers’ inco...
3,"Modi’s boat ride got the coverage, but on TV, ...",https://theprint.in/opinion/telescope/modis-bo...,"3 December, 2020 8:26 am",Shailaja Bajpai,farm laws,When a Zee News reporter tried to explain a ga...
4,"Improve farm inputs, equip panchayats to verif...",https://theprint.in/opinion/improve-farm-input...,"7 December, 2020 10:35 am",Srijan Pal Singh,farm laws,In my project to evolve the PURA policy with f...
5,Opposition has finally come together on farm l...,https://theprint.in/opinion/opposition-has-fin...,"28 September, 2020 9:40 am",Zainab Sikander,farm laws,Speaking boldly on Twitter won't work. Opposit...


In [None]:
print(theprint_df['Link'][5])
print(theprint_df['Article'][5])

https://theprint.in/opinion/opposition-has-finally-come-together-on-farm-legislation-but-that-wont-be-enough/511421/
Speaking boldly on Twitter won't work. Opposition can learn from anti-CAA protests ahead of Bihar and West Bengal assembly elections.E ighteen opposition parties and 31 farmer organisations have come together to oppose Narendra Modi government’s apparently ill-thought out farm laws, which they wanted a select committee to review before passing in the Rajya Sabha due to the evident lacunae in them. But the opposition was bulldozed. Some experts claim that these legislation can eventually lead to formations of monopolies, oligopolies and cartels. But how will the Opposition convince the Indian public about the ill-effects of these farm sector reforms? The opposition parties may have come together – a rare occurrence nowadays – but can they sustain, be effective and mobilise mass uprising? Speaking boldly on Twitter, broadcasting self-recorded videos from central hall and g

In [None]:
#Collecting the links that were not added to the dataframe
topic_other_list_1, exceptions_links_list_1 = link_to_article(links_list[15:], topic)

Article is not about current topic
Link to article:  https://theprint.in/environment/why-punjabs-farmers-continue-to-burn-stubble-and-poison-delhis-air/319601/
Articles added:  0
Article is not about current topic
Link to article:  https://theprint.in/talk-point/demonetisation-passion-play-pm/14926/
Articles added:  0
Article is not about current topic
Link to article:  https://theprint.in/health/toxic-air-from-farm-fires-could-make-north-indias-covid-fight-deadlier/535904/
Articles added:  0
Articles added:  1
Articles added:  2
Articles added:  3
Articles added:  4
Article is not about current topic
Link to article:  https://theprint.in/india/protection-under-new-land-orders-same-as-himachal-and-uttarakhand-laws-jk-govt-says/535976/
Articles added:  4
Article is not about current topic
Link to article:  https://theprint.in/politics/bjps-big-bengal-plan-for-october-amit-shah-puja-visit-womens-defence-training-protests/515546/
Articles added:  4
Article is not about current topic
Link 

Articles added:  36
Article is not about current topic
Link to article:  https://theprint.in/india/by-what-date-will-chinese-troops-be-kicked-out-of-india-rahul-gandhi-asks-pm-modi/527548/
Articles added:  36
Article is not about current topic
Link to article:  https://theprint.in/plugged-in/india-takes-stand-for-harassed-wives-and-security-forces-get-free-hand-in-valley/72007/
Articles added:  36
Articles added:  37
Articles added:  38
Article is not about current topic
Link to article:  https://theprint.in/opinion/politically-correct/rss-vajpayee-pracharak-narendra-modi/418509/
Articles added:  38
Article is not about current topic
Link to article:  https://theprint.in/india/tamil-culture-essential-for-indias-future-says-rahul-gandhi-at-jallikattu-event-in-tn/585500/
Articles added:  38
Article is not about current topic
Link to article:  https://theprint.in/politics/if-they-have-trouble-with-bharat-mata-ki-jai-bihar-has-trouble-with-them-modi-slams-rjd/536131/
Articles added:  38
Ar

Articles added:  85
Articles added:  86
Articles added:  87
Article is not about current topic
Link to article:  https://theprint.in/science/amazon-the-lungs-of-the-planet-is-on-fire-here-are-5-things-you-need-to-know/281055/
Articles added:  87
Articles added:  88
Article is not about current topic
Link to article:  https://theprint.in/india/governance/west-bengal-plans-new-law-to-tackle-fake-news-on-social-media/70857/
Articles added:  88
Articles added:  89
Article is not about current topic
Link to article:  https://theprint.in/india/delhi-court-denies-deep-sidhus-custody-to-police-in-red-fort-violence-case-filed-by-asi/642522/
Articles added:  89
Article is not about current topic
Link to article:  https://theprint.in/yourturn/reader-view-time-for-govt-to-fulfill-its-minimum-government-maximum-governance-promise/575158/
Articles added:  89
Article is not about current topic
Link to article:  https://theprint.in/world/coronavirus-should-finally-make-us-act-on-illegal-wildlife-trade

Articles added:  127
Article is not about current topic
Link to article:  https://theprint.in/economy/indias-budget-could-widen-to-3-8-of-gdp/346332/
Articles added:  127
Article is not about current topic
Link to article:  https://theprint.in/india/aap-says-cm-arvind-kejriwals-movement-still-restricted-delhi-police-denies-claim/563448/
Articles added:  127
Articles added:  128
Articles added:  129
Articles added:  130
Articles added:  131
Article is not about current topic
Link to article:  https://theprint.in/opinion/women-left-behind-rajasthan-health-insurance-scheme-has-a-gender-gap-study/636101/
Articles added:  131
Articles added:  132
Article is not about current topic
Link to article:  https://theprint.in/india/3-year-old-sujith-trapped-tamil-nadu-borewell-80-hours-dies-body-pulled-out/312712/
Articles added:  132
Articles added:  133
Article is not about current topic
Link to article:  https://theprint.in/talk-point/talk-point-gujarats-income-growth-stayed-same/15436/
Articles

Articles added:  169
Articles added:  170
Articles added:  171
Articles added:  172
Article is not about current topic
Link to article:  https://theprint.in/politics/bjp-set-for-major-overhaul-many-states-likely-to-get-new-party-chiefs-ahead-of-ls-polls/46502/
Articles added:  172
Article is not about current topic
Link to article:  https://theprint.in/opinion/politricks/silence-of-the-gandhis-vs-vocal-ashok-gehlot-bhupesh-baghel-shows-a-new-trend-in-congress/550512/
Articles added:  172
Article is not about current topic
Link to article:  https://theprint.in/opinion/having-limited-mahatma-gandhi-to-sanitation-modi-is-ignoring-his-central-message/262256/
Articles added:  172
Article is not about current topic
Link to article:  https://theprint.in/politics/how-altaf-bukhari-bjps-b-team-in-jk-is-surviving-without-selling-dreams-about-article-370/708862/
Articles added:  172
Article is not about current topic
Link to article:  https://theprint.in/india/education/why-defence-minister-rajna

Articles added:  200
Articles added:  201
Article is not about current topic
Link to article:  https://theprint.in/world/trump-sought-help-of-chinas-xi-to-win-2020-re-election-former-nsa-bolton-writes-in-book/443730/
Articles added:  201
Article is not about current topic
Link to article:  https://theprint.in/india/security-tightened-notice-to-farmers-singhu-ghazipur-still-tense-2-days-after-violence/594175/
Articles added:  201
Articles added:  202
Article is not about current topic
Link to article:  https://theprint.in/politics/kerala-governor-arif-khan-criticises-farm-laws-reads-out-anti-centre-remarks-in-assembly/581925/
Articles added:  202
Articles added:  203
Articles added:  204
Articles added:  205
Articles added:  206
Articles added:  207
Article is not about current topic
Link to article:  https://theprint.in/opinion/jinnah-once-famously-called-amu-the-arsenal-of-muslim-india/57174/
Articles added:  207
Articles added:  208
Article is not about current topic
Link to article:

Articles added:  239
Articles added:  240
Articles added:  241
Articles added:  242
Article is not about current topic
Link to article:  https://theprint.in/sg-writings-on-the-wall/writings-on-the-wall-in-a-tearing-hurry-ana/544001/
Articles added:  242
Article is not about current topic
Link to article:  https://theprint.in/india/lok-sabha-passes-retrospective-tax-bill-amid-protests-by-opposition-over-pegasus-other-issues/710463/
Articles added:  242
Articles added:  243
Article is not about current topic
Link to article:  https://theprint.in/yourturn/subscriberwrites-expression-of-dissent-cannot-occur-on-the-streets-of-delhi-but-in-the-voting-booth/663285/
Articles added:  243
Articles added:  244
Article is not about current topic
Link to article:  https://theprint.in/politics/mamata-indicates-west-bengal-will-implement-pm-kisan-scheme-says-govt-has-sought-farmers-data/579404/
Articles added:  244
Articles added:  245
Article is not about current topic
Link to article:  https://thep

Articles added:  279
Article is not about current topic
Link to article:  https://theprint.in/plugged-in/primetime/kal-yug-on-zee-news-derek-obriens-acronym-for-fdi-sasikalas-return-on-india-today/601560/
Articles added:  279
Article is not about current topic
Link to article:  https://theprint.in/national-interest/when-you-cry-havoc-but-leash-the-dogs-of-war/5558/
Articles added:  279
Article is not about current topic
Link to article:  https://theprint.in/india/manish-sisodia-visits-ghazipur-border-to-check-arrangements-made-by-delhi-govt-for-farmers/594557/
Articles added:  279
Articles added:  280
Article is not about current topic
Link to article:  https://theprint.in/india/kashmir-battles-drugs-now-cases-rise-by-1000-in-3-years-at-just-one-hospital/266829/
Articles added:  280
Article is not about current topic
Link to article:  https://theprint.in/india/governance/343-new-job-roles-empowered-panels-minister-lays-out-modi-govts-skills-scheme-plans/612424/
Articles added:  280
Art

Articles added:  324
Articles added:  325
Article is not about current topic
Link to article:  https://theprint.in/thought-shot/ashok-gulatis-onion-fiasco-fix-where-are-women-bankers-asks-tamal-bandopadhyay/339328/
Articles added:  325
Articles added:  326
Article is not about current topic
Link to article:  https://theprint.in/politics/bjp-not-an-election-winning-machine-but-movement-to-win-hearts-modi-on-party-foundation-day/634926/
Articles added:  326
Article is not about current topic
Link to article:  https://theprint.in/last-laughs/last-laughs-the-dormant-lokayukta-bill-and-trinamool/55495/
Articles added:  326
Articles added:  327
Articles added:  328
Articles added:  329
Articles added:  330
Articles added:  331
Articles added:  332
Article is not about current topic
Link to article:  https://theprint.in/world/who-is-navdeep-bains-khalistani-sympathiser-who-quit-trudeau-cabinet-for-his-family/587278/
Articles added:  332
Article is not about current topic
Link to article:  htt

Article is not about current topic
Link to article:  https://theprint.in/india/a-multi-crore-insurance-fraud-that-fed-on-poor-cancer-patients-in-haryana/235771/
Articles added:  363
Article is not about current topic
Link to article:  https://theprint.in/india/governance/after-complaints-by-maneka-environment-ministry-may-replace-animal-welfare-board-head/99424/
Articles added:  363
Articles added:  364
Article is not about current topic
Link to article:  https://theprint.in/theprint-essential/why-a-section-of-the-land-acquisition-act-turned-into-a-big-judicial-controversy/305787/
Articles added:  364
Articles added:  365
Article is not about current topic
Link to article:  https://theprint.in/opinion/modi-govts-labour-reform-was-long-awaited-but-it-alone-cant-change-face-of-industry/515695/
Articles added:  365
Articles added:  366
Articles added:  367
Articles added:  368
Articles added:  369
Articles added:  370
Article is not about current topic
Link to article:  https://theprint.i

Articles added:  410
Article is not about current topic
Link to article:  https://theprint.in/politics/bjp-thrown-2019-elections-derek-obrien/32328/
Articles added:  410
Articles added:  411
Article is not about current topic
Link to article:  https://theprint.in/politics/modi-should-host-farmers-at-home-offer-tea-like-he-did-to-barack-obama-in-2015-says-owaisi/600637/
Articles added:  411
Articles added:  412
Articles added:  413
Article is not about current topic
Link to article:  https://theprint.in/last-laughs/last-laughs-karnataka-election-results-to-tmc-panchayat-poll-violence/58830/
Articles added:  413
Article is not about current topic
Link to article:  https://theprint.in/india/governance/govt-makes-room-for-private-sector-talent-wants-specialists-to-join-ministries-as-joint-secys/68463/
Articles added:  413
Article is not about current topic
Link to article:  https://theprint.in/defence/rashtriya-rifles-battalion-co-among-5-killed-in-encounter-with-terrorists-in-jks-handwara

Articles added:  448
Articles added:  449
Article is not about current topic
Link to article:  https://theprint.in/opinion/soldier-guard-better-smart-fences-night-vision-cameras-control-rooms/35060/
Articles added:  449
Article is not about current topic
Link to article:  https://theprint.in/politics/caa-will-not-be-allowed-in-tamil-nadu-if-dmk-is-voted-to-power-says-stalin/630668/
Articles added:  449
Article is not about current topic
Link to article:  https://theprint.in/talk-point/talk-point-modi-accelerated-1990s-growth/15428/
Articles added:  449
Articles added:  450
Article is not about current topic
Link to article:  https://theprint.in/india/governance/rajya-sabha-passes-3-key-labour-reform-bills-amid-boycott-by-opposition-parties/508939/
Articles added:  450
Articles added:  451
Articles added:  452
Articles added:  453
Article is not about current topic
Link to article:  https://theprint.in/politics/bjp-cancels-up-panchayat-poll-plan-amid-covid-surge-wont-hold-ward-level-pub

Article is not about current topic
Link to article:  https://theprint.in/diplomacy/now-boris-johnson-to-realign-uk-foreign-policy-towards-indo-pacific-before-his-india-visit/622546/
Articles added:  491
Articles added:  492
Articles added:  493
Article is not about current topic
Link to article:  https://theprint.in/india/governance/no-withdrawal-of-troops-from-maoist-hit-areas-says-chhattisgarh-cm-baghel/167991/
Articles added:  493
Articles added:  494
Articles added:  495
Articles added:  496
Article is not about current topic
Link to article:  https://theprint.in/politics/dont-teach-us-hindutva-why-no-bharat-ratna-for-savarkar-yet-uddhav-thackeray-to-bjp/615378/
Articles added:  496
Articles added:  497
Article is not about current topic
Link to article:  https://theprint.in/india/mp-police-files-fir-against-tharoor-sardesai-5-other-journalists-for-their-tweets/594742/
Articles added:  497
Articles added:  498
Article is not about current topic
Link to article:  https://theprint.in

Articles added:  526
Article is not about current topic
Link to article:  https://theprint.in/politics/jats-are-the-latest-headache-for-rajasthan-cm-vasundhara-raje/150558/
Articles added:  526
Article is not about current topic
Link to article:  https://theprint.in/opinion/india-becomes-de-facto-hindu-rashtra-bjp-looks-for-new-polarisation-ways/331220/
Articles added:  526
Article is not about current topic
Link to article:  https://theprint.in/economy/these-are-the-market-winners-and-losers-after-modi-wins-a-second-term/240070/
Articles added:  526
Articles added:  527
Article is not about current topic
Link to article:  https://theprint.in/politics/dushyant-chautala-tau-devi-lals-true-heir-and-the-prince-who-fooled-farmers/628337/
Articles added:  527
Article is not about current topic
Link to article:  https://theprint.in/yourturn/reader-view-govt-needs-to-encourage-farmers-to-adopt-other-methods-to-tackle-stubble-burning/535429/
Articles added:  527
Article is not about current to

Articles added:  572
Articles added:  573
Article is not about current topic
Link to article:  https://theprint.in/talk-point/legality-of-demonetisation/15002/
Articles added:  573
Article is not about current topic
Link to article:  https://theprint.in/opinion/terrorise-stubble-burning-problem/14822/
Articles added:  573
Article is not about current topic
Link to article:  https://theprint.in/economy/saudi-oil-shock-will-probably-not-matter-as-much-to-the-world-as-it-could-to-india/292611/
Articles added:  573
Articles added:  574
Articles added:  575
Article is not about current topic
Link to article:  https://theprint.in/politics/fuel-price-hike-jobs-womens-safety-are-the-top-suggestions-for-congress-2019-manifesto/141985/
Articles added:  575
Articles added:  576
Articles added:  577
Article is not about current topic
Link to article:  https://theprint.in/india/dont-vote-for-bjp-rakesh-tikait-tells-mahapanchayats-in-poll-bound-west-bengal/621471/
Articles added:  577
Article is not

Articles added:  608
Article is not about current topic
Link to article:  https://theprint.in/politics/timeline-the-twists-turns-in-the-political-battle-for-karnataka/59794/
Articles added:  608
Articles added:  609
Article is not about current topic
Link to article:  https://theprint.in/india/28-terrorists-killed-in-kashmir-in-april-highest-since-the-scrapping-of-article-370/414228/
Articles added:  609
Articles added:  610
Articles added:  611
Articles added:  612
Articles added:  613
Articles added:  614
Articles added:  615
Articles added:  616
Articles added:  617
Article is not about current topic
Link to article:  https://theprint.in/india/start-free-mass-vaccination-stop-central-vista-project-12-opposition-parties-write-to-modi/657317/
Articles added:  617
Article is not about current topic
Link to article:  https://theprint.in/india/punjab-govt-selling-covid-vaccines-to-private-hospitals-for-profit-alleges-sukhbir-badal/671622/
Articles added:  617
Articles added:  618
Article

Articles added:  666
Articles added:  667
Articles added:  668
Articles added:  669
Article is not about current topic
Link to article:  https://theprint.in/india/they-punctured-her-scooter-waited-how-4-men-executed-horrific-hyderabad-rape-murder/329459/
Articles added:  669
Articles added:  670
Article is not about current topic
Link to article:  https://theprint.in/opinion/death-penalty-for-rape-isnt-justice-it-is-vengeance/53519/
Articles added:  670
Article is not about current topic
Link to article:  https://theprint.in/opinion/modis-budget-lures-foreign-capital-but-has-precious-little-for-indians/358861/
Articles added:  670
Articles added:  671
Article is not about current topic
Link to article:  https://theprint.in/ani-press-releases/bangalore-based-property-consultancy-elegance-enterprises-launches-an-online-marketplace-dedicated-to-farmlands/677869/
Articles added:  671
Article is not about current topic
Link to article:  https://theprint.in/economy/these-are-the-5-key-factor

Articles added:  686
Article is not about current topic
Link to article:  https://theprint.in/theprint-otc/selling-psus-fiscal-deficit-short-term-solution-nobel-laureate-abhijit-banerjee/309377/
Articles added:  686
Article is not about current topic
Link to article:  https://theprint.in/pageturner/excerpt/20-years-later-this-is-why-salman-khan-is-still-hated-by-the-bishnois-of-jodhpur/281349/
Articles added:  686
Article is not about current topic
Link to article:  https://theprint.in/politics/10-reasons-why-indias-economy-is-in-the-doldrums-according-to-p-chidambaram/68627/
Articles added:  686
Article is not about current topic
Link to article:  https://theprint.in/talk-point/talk-point-demonetisation-hasnt-even-attacked-1-per-cent-black-money/14933/
Articles added:  686
Articles added:  687
Articles added:  688
Articles added:  689
Articles added:  690
Article is not about current topic
Link to article:  https://theprint.in/politics/kisan-budget-media-freedom-right-to-healthcare-co

Articles added:  729
Articles added:  730
Article is not about current topic
Link to article:  https://theprint.in/economy/modi-govts-rs-50000-crore-export-stimulus-isnt-a-game-changer/291935/
Articles added:  730
Article is not about current topic
Link to article:  https://theprint.in/talk-point/purulia-killings-rss-bjp-workers-under-threat-in-mamatas-west-bengal-or-is-it-orchestrated-panic/66000/
Articles added:  730
Articles added:  731
Article is not about current topic
Link to article:  https://theprint.in/plugged-in/mallya-competes-with-mamata-for-the-headlines/188037/
Articles added:  731
Article is not about current topic
Link to article:  https://theprint.in/thought-shot/ruchir-sharma-sees-economic-democratic-revival-mihir-sharma-on-3-myths-of-eco-downturn/332397/
Articles added:  731
Articles added:  732
Articles added:  733
Articles added:  734
Articles added:  735
Articles added:  736
Articles added:  737
Articles added:  738
Articles added:  739
Number of articles added:  

In [None]:
#Saving the dataframe to disk
theprint_df.to_pickle("theprint_dataframe", compression="zip")

In [None]:
#To load the pickled dataframe
theprint_df = pd.read_pickle("theprint_dataframe", compression="zip")
theprint_df.shape

(745, 6)

In [None]:
print("Number of articles whose topic needs to be checked: ", len(pd.Series(topic_other_list_1).unique()))
print("Number of links that threw exceptions: ", len(exceptions_links_list_1))

Number of articles whose topic needs to be checked:  701
Number of links that threw exceptions:  13


In [None]:
exceptions_links_list_1

['https://theprint.in/plugged-in/primetime/rajdeep-questions-yechury-over-oppn-tactics-mirror-now-examines-andhras-mystery-disease/563965/',
 'https://theprint.in/plugged-in/primetime/aaj-tak-questions-yogis-love-jihad-law-deepak-chaurasia-attacks-pakistan-on-news-nation/568408/',
 'https://theprint.in/plugged-in/primetime/arnab-says-common-people-rejected-bharat-bandh-navika-says-oppn-losing-grip-on-reality/563276/',
 'https://theprint.in/plugged-in/primetime/newsx-zee-news-on-26-11-attacks-ravish-kumar-on-protesting-farmers-left-out-in-the-cold/552817/',
 'https://theprint.in/plugged-in/primetime/singhu-border-shouldnt-become-shaheen-bagh-says-zee-news-ravish-on-khalistanis-at-protests/564645/',
 'https://theprint.in/theprint-profile/china-hand-syl-protest-death-remark-how-haryana-agri-minister-loves-to-target-farmers/606069/',
 'https://theprint.in/plugged-in/primetime/arnab-calls-badals-award-return-hypocritical-ravish-on-langar-in-vigyan-bhawan/557263/',
 'https://theprint.in/plug

Since most of these are articles on prime time news, they can be ignored.

Let's review a few random articles.

In [None]:
sample_df = theprint_df.sample(n=10)
sample_df

Unnamed: 0,Title,Link,Date,Authors,Topic,Article
204,"SC panel will continue work, whether farmers t...",https://theprint.in/india/sc-panel-will-contin...,"21 January, 2021 7:51 pm",Sravasti Dasgupta,farm laws,The panel met the first batch of farmers Thurs...
644,BJP disrespecting Constitution by making laws ...,https://theprint.in/politics/bjp-disrespecting...,"29 December, 2020 8:00 pm",PTI,farm laws,The former J&amp;K chief minister also said th...
20,No formal talks with govt until harassment sto...,https://theprint.in/india/no-formal-talks-with...,"2 February, 2021 4:25 pm",PTI,farm laws,The farmer union alleged that increased barric...
360,Haryana’s Chautala village sent hundreds to fa...,https://theprint.in/india/how-haryanas-chautal...,"31 May, 2021 8:00 am",Ananya Bhardwaj,farm laws,Former Haryana chief minister O.P. Chautala's ...
492,Why alarm bells are ringing in Punjab over Dee...,https://theprint.in/india/why-alarm-bells-are-...,"6 March, 2021 5:43 pm",Chitleen K Sethi,farm laws,The two have emerged from the farmers’ agitati...
46,"Trump’s disappointment with Supreme Court, and...",https://theprint.in/last-laughs/trumps-disappo...,"14 January, 2021 6:32 pm",Yimkumla Longkumer,farm laws,"The best cartoons of the day, chosen by the ed..."
3,"Modi’s boat ride got the coverage, but on TV, ...",https://theprint.in/opinion/telescope/modis-bo...,"3 December, 2020 8:26 am",Shailaja Bajpai,farm laws,When a Zee News reporter tried to explain a ga...
471,"Farmer leaders reject Modi govt’s proposal, st...",https://theprint.in/india/farmer-leaders-in-a-...,"9 December, 2020 5:07 pm",Sravasti Dasgupta,farm laws,The central govt sent a draft proposal with am...
628,Cracks appear among farmer unions after R-Day ...,https://theprint.in/india/cracks-appear-among-...,"27 January, 2021 6:45 pm",Moushumi Das Gupta,farm laws,Bharatiya Kisan Union (Bhanu) and Rashtriya Ki...
679,Punjab’s protesting farmers playing into Modi ...,https://theprint.in/politics/punjabs-protestin...,"11 November, 2020 11:12 am",Sravasti Dasgupta,farm laws,The Modi government has invited protesting far...


In [None]:
print(theprint_df['Link'][679])
print(theprint_df['Article'][679])

https://theprint.in/politics/punjabs-protesting-farmers-playing-into-modi-govts-hands-says-state-congress-chief/541831/
The Modi government has invited protesting farmers for talks on 13 November in a bid to end the impasse. But Sunil Jakhar says Centre’s intentions are suspect. Protesting farmers are playing into the hands of the Narendra Modi government by not allowing the passage of passenger trains into the state, Punjab Pradesh Congress Committee President Sunil Jakhar told ThePrint Tuesday.   “While Centre may have invited farmers for talks, the farmers are playing into the hands of the Centre by not allowing passage of passenger trains. Because the Centre is not affected by stoppage of trains in any way and neither do they care about Punjab or its farmers,” said the former Gurdaspur MP. His comments come amid the continued suspension of trains to Punjab by the central government and the ongoing protest by the state’s farmers against the  three contentious farm legislations passe

Delete 'last-laughs' links? https://theprint.in/last-laughs/trumps-disappointment-with-supreme-court-and-what-sitharamans-budget-needs/585400/

#### 2. Rafale

In [None]:
#To load the pickled dataframe
theprint_df = pd.read_pickle("theprint_dataframe", compression="zip")
theprint_df.shape

(745, 6)

In [None]:
#Setting the topic
topic = 'rafale'

#Unpickling the links
links_list = links_unpickle(topic)

Number of links on the rafale topic is 1172.


In [None]:
#Removing duplicate links
links_list = list(set(links_list))

#Dropping links that have the term 'video' in them
links_list = [l for l in links_list if 'video' not in l]
print("Number of links after cleaning: ", len(links_list))

Number of links after cleaning:  1147


Adding these articles to the dataframe

In [None]:
#Collecting the links that were not added to the dataframe
topic_other_list_2, exceptions_links_list_2 = link_to_article(links_list, topic)

Article is not about current topic
Link to article:  https://theprint.in/talk-point/sc-electoral-bonds-order-some-transparency-or-conveniently-postponing-the-controversy/220738/
Articles added:  0
Article is not about current topic
Link to article:  https://theprint.in/politics/reading-between-the-lines-what-3-losing-bjp-cms-have-been-up-to-on-social-media/163722/
Articles added:  0
Articles added:  1
Article is not about current topic
Link to article:  https://theprint.in/india/chidambaram-slams-bjp-for-their-silence-on-siddiquis-death-soaring-inflation/698491/
Articles added:  1
Articles added:  2
Article is not about current topic
Link to article:  https://theprint.in/opinion/brahmastra/aatmanirbhar-in-defence-cant-be-a-mere-slogan-modi-govt-must-handhold-private-players/462575/
Articles added:  2
Articles added:  3
Article is not about current topic
Link to article:  https://theprint.in/india/punjab-cm-amarinder-singh-is-ready-for-truce-with-navjot-sidhu-if-he-apologises-for-his-at

Articles added:  19
Article is not about current topic
Link to article:  https://theprint.in/talk-point/revoking-pakistans-mfn-status-proportionate-reply-or-modi-exploring-diplomatic-steps-first/193282/
Articles added:  19
Article is not about current topic
Link to article:  https://theprint.in/politics/every-deal-not-bofors-rahul-gandhi-should-apologise-for-false-claims-on-rafale-bjp/163503/
Articles added:  19
Articles added:  20
Article is not about current topic
Link to article:  https://theprint.in/plugged-in/republics-pm-interview-arnabs-questions-invite-attacks-on-opposition-by-modi/214147/
Articles added:  20
Article is not about current topic
Link to article:  https://theprint.in/defence/defence-ministry-approves-purchase-of-33-fighter-aircraft-for-iaf-248-indigenous-missiles/453299/
Articles added:  20
Article is not about current topic
Link to article:  https://theprint.in/plugged-in/indian-tv-news-feels-the-josh-of-airstrikes-pak-media-takes-forest-cover/198165/
Articles ad

Articles added:  40
Articles added:  41
Articles added:  42
Articles added:  43
Articles added:  44
Article is not about current topic
Link to article:  https://theprint.in/opinion/heres-why-you-will-watch-arnab-goswami-navika-kumar-long-after-election-results-are-out/310364/
Articles added:  44
Articles added:  45
Article is not about current topic
Link to article:  https://theprint.in/india/citizenship-amendment-act-is-not-anti-muslim-says-rajnath-singh/336706/
Articles added:  45
Articles added:  46
Articles added:  47
Articles added:  48
Article is not about current topic
Link to article:  https://theprint.in/opinion/india-could-emerge-as-the-global-power-the-world-has-been-waiting-for-after-covid/436814/
Articles added:  48
Article is not about current topic
Link to article:  https://theprint.in/opinion/pharma-firms-and-diagnostics-developers-need-to-unite-they-are-catalysts-for-saving-lives/698641/
Articles added:  48
Article is not about current topic
Link to article:  https://t

Articles added:  67
Article is not about current topic
Link to article:  https://theprint.in/opinion/heres-what-narendra-modi-really-wants-to-say-in-mann-ki-baat-but-cant/169408/
Articles added:  67
Article is not about current topic
Link to article:  https://theprint.in/defence/india-to-stick-to-its-guns-firm-on-russian-s-400-deal-despite-us-pressure-to-back-out/247760/
Articles added:  67
Article is not about current topic
Link to article:  https://theprint.in/opinion/brahmastra/india-can-learn-from-azerbaijan-how-to-spend-smart-on-military-when-budget-is-thin/620248/
Articles added:  67
Article is not about current topic
Link to article:  https://theprint.in/politics/amit-shah-siddaramaiah-govt-karnataka-divide-hindus/45056/
Articles added:  67
Article is not about current topic
Link to article:  https://theprint.in/defence/masood-azhars-jaish-e-mohammad-is-a-potent-force-in-kashmir/193051/
Articles added:  67
Article is not about current topic
Link to article:  https://theprint.in/

Article is not about current topic
Link to article:  https://theprint.in/economy/why-sri-lankas-economy-faces-a-painful-reset-in-the-coming-months/437591/
Articles added:  99
Articles added:  100
Article is not about current topic
Link to article:  https://theprint.in/politics/no-one-can-counter-pm-modi-for-next-25-years-shiv-sena/239666/
Articles added:  100
Article is not about current topic
Link to article:  https://theprint.in/india/governance/rs-13-crore-gst-fraud-busted-after-up-cops-sales-tax-depts-joint-raid-in-lucknow-noida/221323/
Articles added:  100
Articles added:  101
Article is not about current topic
Link to article:  https://theprint.in/politics/left-out-of-up-alliance-congress-gives-up-plan-to-include-bsp-in-mp-cabinet/178295/
Articles added:  101
Article is not about current topic
Link to article:  https://theprint.in/last-laughs/celebrating-section-377-verdict-and-madhya-pradesh-congress-love-for-cows/112787/
Articles added:  101
Articles added:  102
Articles added:

Articles added:  134
Article is not about current topic
Link to article:  https://theprint.in/opinion/chakraview/the-desi-lca-is-defence-ministrys-low-hanging-fruit-and-it-needs-to-be-plucked-now/26688/
Articles added:  134
Article is not about current topic
Link to article:  https://theprint.in/politics/bjp-faces-age-old-poll-quandary-whether-or-not-to-field-veterans-advani-joshi/206146/
Articles added:  134
Article is not about current topic
Link to article:  https://theprint.in/politics/nitin-gadkari-only-bjp-minister-with-guts-says-rahul-gandhi/187715/
Articles added:  134
Articles added:  135
Article is not about current topic
Link to article:  https://theprint.in/judiciary/ayodhya-matter-could-have-been-heard-faster-if-we-had-the-technology-back-then-cji-bobde/422694/
Articles added:  135
Articles added:  136
Article is not about current topic
Link to article:  https://theprint.in/india/governance/modis-visit-helps-india-sweden-bury-the-bofors-ghost-focus-on-defence-security-ties

Articles added:  152
Article is not about current topic
Link to article:  https://theprint.in/defence/russia-wont-sell-war-equipment-to-pakistan-ties-limited-to-fighting-terror/233757/
Articles added:  152
Articles added:  153
Articles added:  154
Article is not about current topic
Link to article:  https://theprint.in/plugged-in/modi-writes-to-imran-khan-says-lets-talk/101851/
Articles added:  154
Articles added:  155
Article is not about current topic
Link to article:  https://theprint.in/opinion/indias-out-shopping-for-arms-with-empty-pockets-cant-afford-more-men-modern-machines/25582/
Articles added:  155
Article is not about current topic
Link to article:  https://theprint.in/defence/iaf-to-buy-83-more-tejas-fighters-from-hal-instead-of-foreign-jets-cds-rawat-says/421827/
Articles added:  155
Articles added:  156
Article is not about current topic
Link to article:  https://theprint.in/politics/in-shimlas-iaf-vs-army-fight-nationalism-balakot-battle-jobs-highways/232727/
Articles a

Articles added:  173
Articles added:  174
Article is not about current topic
Link to article:  https://theprint.in/india/former-cji-ranjan-gogoi-takes-oath-as-rajya-sabha-member-amid-shame-chants/383657/
Articles added:  174
Article is not about current topic
Link to article:  https://theprint.in/defence/bjp-leaders-vice-presidents-office-send-out-air-force-day-greetings-with-non-indian-jets/131213/
Articles added:  174
Article is not about current topic
Link to article:  https://theprint.in/opinion/modi-monitor/after-indira-gandhi-narendra-modi-only-pm-to-display-enthusiasm-about-sports/109332/
Articles added:  174
Articles added:  175
Articles added:  176
Article is not about current topic
Link to article:  https://theprint.in/india/38-iaf-aircraft-armys-t-90-tanks-17-state-tableaux-feature-in-republic-day-parade/592271/
Articles added:  176
Article is not about current topic
Link to article:  https://theprint.in/opinion/telescope/a-new-era-in-tv-rahul-gandhi-a-rising-star-and-arnab-

Articles added:  194
Article is not about current topic
Link to article:  https://theprint.in/defence/boeing-plans-trials-of-its-super-hornet-jets-on-ski-jump-ramp-to-meet-indian-requirements/360878/
Articles added:  194
Article is not about current topic
Link to article:  https://theprint.in/defence/will-order-for-83-tejas-soon-hal-to-deliver-70-aircraft-by-2026-iaf-chief-bhadauria/432320/
Articles added:  194
Article is not about current topic
Link to article:  https://theprint.in/plugged-in/india-fobs-off-trump-toi-mint-warns-of-deepest-recession-ambani-stocks-rise-in-bs/431560/
Articles added:  194
Article is not about current topic
Link to article:  https://theprint.in/politics/haryana-congress-shake-up-has-sonias-stamp-all-over-and-a-peek-at-where-rahul-failed/287131/
Articles added:  194
Articles added:  195
Articles added:  196
Article is not about current topic
Link to article:  https://theprint.in/health/13-states-reply-to-modi-govt-query-on-oxygen-deaths-only-1-reports-suspe

Articles added:  210
Article is not about current topic
Link to article:  https://theprint.in/50-word-edit/statistics-panel-resignations-only-reinforce-doubts-over-indias-official-economic-data/185291/
Articles added:  210
Articles added:  211
Article is not about current topic
Link to article:  https://theprint.in/politics/what-karnataka-bjp-leaders-plan-to-do-to-stop-bengaluru-air-show-from-moving-to-yogis-up/97828/
Articles added:  211
Articles added:  212
Articles added:  213
Article is not about current topic
Link to article:  https://theprint.in/india/education/only-9-states-and-uts-have-re-opened-or-want-to-reopen-schools-govt-tells-parliament/711958/
Articles added:  213
Article is not about current topic
Link to article:  https://theprint.in/politics/indias-challenges-have-become-worse-under-pm-modi-says-pew-survey/212340/
Articles added:  213
Article is not about current topic
Link to article:  https://theprint.in/last-laughs/farm-growth-slows-but-pride-surges-and-pm-modi-mix

Articles added:  233
Article is not about current topic
Link to article:  https://theprint.in/politics/rahul-gandhi-to-return-tata-land-to-bastar-tribals-in-bid-to-project-pro-poor-image/192916/
Articles added:  233
Articles added:  234
Articles added:  235
Article is not about current topic
Link to article:  https://theprint.in/economy/companies-in-covid-19-beauty-brands-making-sanitisers-biggest-soap-company-giving-free-soap/389082/
Articles added:  235
Article is not about current topic
Link to article:  https://theprint.in/india/governance/robert-vadra-moves-delhi-high-court-seeking-to-drop-money-laundering-case-against-him/208650/
Articles added:  235
Article is not about current topic
Link to article:  https://theprint.in/politics/vendetta-vs-mandate-ta-heres-how-modi-can-reclaim-his-anti-corruption-agenda-post-pnb-scam/41213/
Articles added:  235
Article is not about current topic
Link to article:  https://theprint.in/opinion/waah-modiji-waah-left-liberals-like-kunal-kamra-only-

Articles added:  250
Articles added:  251
Article is not about current topic
Link to article:  https://theprint.in/plugged-in/from-soft-and-hard-laddoos-to-50-years-of-power-rss-bjp-dominates-headlines/114977/
Articles added:  251
Article is not about current topic
Link to article:  https://theprint.in/national-interest/now-that-modi-has-protected-india-voters-are-back-to-basics-jobs-income-farm-prices/311634/
Articles added:  251
Articles added:  252
Article is not about current topic
Link to article:  https://theprint.in/politics/these-are-indias-34-most-powerful-political-families/202724/
Articles added:  252
Article is not about current topic
Link to article:  https://theprint.in/politics/bjp-has-become-a-party-of-the-dead-says-yashwant-sinha/162847/
Articles added:  252
Articles added:  253
Article is not about current topic
Link to article:  https://theprint.in/judiciary/chauffeur-driven-car-secretary-peons-what-gauhati-hc-will-give-cji-gogoi-when-he-retires/315656/
Articles adde

Articles added:  266
Articles added:  267
Article is not about current topic
Link to article:  https://theprint.in/opinion/telescope/news-channels-have-decided-to-put-rahul-gandhi-and-congress-on-trial-before-lok-sabha-polls/205353/
Articles added:  267
Articles added:  268
Articles added:  269
Article is not about current topic
Link to article:  https://theprint.in/last-laughs/narendra-modi-peeps-into-tmcs-stable-as-india-awaits-voters-majestic-footprint/229580/
Articles added:  269
Article is not about current topic
Link to article:  https://theprint.in/opinion/under-modi-regime-the-election-commission-needs-the-spine-of-former-cec-t-n-seshan/215026/
Articles added:  269
Article is not about current topic
Link to article:  https://theprint.in/politics/bjp-leaders-follow-modi-and-become-chowkidars-only-on-twitter/207077/
Articles added:  269
Article is not about current topic
Link to article:  https://theprint.in/best-of-theprint-icymi/why-supreme-courts-verdict-decriminalising-homose

Articles added:  288
Article is not about current topic
Link to article:  https://theprint.in/pre-truth/congress-has-found-out-who-politicised-pakistan-airstrikes-the-bjp/203088/
Articles added:  288
Articles added:  289
Article is not about current topic
Link to article:  https://theprint.in/opinion/bjp-had-5-trump-cards-before-this-election-but-thats-set-to-change-now/162176/
Articles added:  289
Article is not about current topic
Link to article:  https://theprint.in/india/anil-ambani-cbi-ex-chief-alok-verma-top-cop-rakesh-asthana-on-latest-pegasus-snoop-list/701212/
Articles added:  289
Article is not about current topic
Link to article:  https://theprint.in/defence/india-china-corps-commanders-to-meet-in-ladakh-tomorrow-but-major-breakthrough-unlikely/451163/
Articles added:  289
Article is not about current topic
Link to article:  https://theprint.in/plugged-in/obc-quota-draws-media-attention-bulandshahr-and-michel-remain-on-page-1/159994/
Articles added:  289
Article is not abou

Articles added:  306
Articles added:  307
Article is not about current topic
Link to article:  https://theprint.in/india/governance/ahead-of-putin-visit-gifted-students-from-russia-india-collaborate-on-something-special/129270/
Articles added:  307
Articles added:  308
Article is not about current topic
Link to article:  https://theprint.in/politics/calling-for-discipline-these-days-is-branded-autocratic-modi/110355/
Articles added:  308
Article is not about current topic
Link to article:  https://theprint.in/defence/mechanical-failure-could-be-behind-mirage-2000-accident-death-of-2-pilots/187026/
Articles added:  308
Articles added:  309
Articles added:  310
Article is not about current topic
Link to article:  https://theprint.in/last-laughs/staying-afloat-with-quota-and-a-revolving-cbi-door/176872/
Articles added:  310
Article is not about current topic
Link to article:  https://theprint.in/india/full-text-of-president-kovind-speech-new-india-wants-uninterrupted-accelerated-growth/25

Articles added:  330
Article is not about current topic
Link to article:  https://theprint.in/world/casting-couch-was-just-a-seat-with-benefits-for-all-harvey-weinsteins-lawyers-argue/356404/
Articles added:  330
Article is not about current topic
Link to article:  https://theprint.in/last-laughs/the-lathi-rule-a-gandhi-mukt-congress-and-jammu-and-kashmirs-version-of-democracy/343011/
Articles added:  330
Articles added:  331
Articles added:  332
Article is not about current topic
Link to article:  https://theprint.in/defence/we-hit-target-dont-count-casualties-iaf-chief-on-pakistan-strike/200958/
Articles added:  332
Article is not about current topic
Link to article:  https://theprint.in/plugged-in/election-commission-unsure-but-amit-shah-bats-for-simultaneous-polls/98103/
Articles added:  332
Article is not about current topic
Link to article:  https://theprint.in/plugged-in/a-group-of-ministers-to-tackle-lynching-and-peaceful-protest-a-fundamental-right/87334/
Articles added:  332


Articles added:  350
Article is not about current topic
Link to article:  https://theprint.in/politics/rahul-gandhi-rahul-gandhi-rahul-gandhi-5-reasons-congress-sank-in-delhi/363149/
Articles added:  350
Article is not about current topic
Link to article:  https://theprint.in/politics/the-modest-gunny-bag-might-turn-out-to-be-punjabs-biggest-election-issue/232358/
Articles added:  350
Article is not about current topic
Link to article:  https://theprint.in/last-laughs/abhijit-banerjee-ironic-nobel-win-ravi-shankar-prasad-trashed-statement/306113/
Articles added:  350
Article is not about current topic
Link to article:  https://theprint.in/opinion/modi-monitor/how-modi-speaks-a-different-language-with-pakistan-than-what-vajpayee-did/193618/
Articles added:  350
Articles added:  351
Article is not about current topic
Link to article:  https://theprint.in/opinion/in-2019-modis-novelty-factor-is-gone-from-the-persona-and-the-oratory/190535/
Articles added:  351
Articles added:  352
Article

Articles added:  369
Article is not about current topic
Link to article:  https://theprint.in/opinion/4-reasons-amit-shah-has-been-so-generous-to-nitish-kumar-in-seat-sharing-in-bihar/141145/
Articles added:  369
Articles added:  370
Article is not about current topic
Link to article:  https://theprint.in/last-laughs/rafale-leaks-from-france-and-indias-evm-carriers/635019/
Articles added:  370
Article is not about current topic
Link to article:  https://theprint.in/opinion/the-factivist/the-mess-in-indias-higher-judiciary-is-sadly-of-its-own-making/229978/
Articles added:  370
Article is not about current topic
Link to article:  https://theprint.in/opinion/like-in-all-bad-marriages-shiv-sena-and-bjp-keep-giving-alliance-a-last-shot/197433/
Articles added:  370
Articles added:  371
Articles added:  372
Articles added:  373
Articles added:  374
Article is not about current topic
Link to article:  https://theprint.in/talk-point/citizenship-law-protests-is-the-oppositions-political-respons

Articles added:  383
Article is not about current topic
Link to article:  https://theprint.in/defence/iaf-looking-at-leasing-midair-refuellers-as-efforts-to-purchase-6-go-on-says-bhadauria/517377/
Articles added:  383
Article is not about current topic
Link to article:  https://theprint.in/world/north-korea-locks-down-city-following-report-on-the-first-suspected-case-of-covid/468404/
Articles added:  383
Article is not about current topic
Link to article:  https://theprint.in/theprint-essential/ex-cjis-iaf-officer-brother-anjan-gogoi-too-has-post-retirement-job-secretary-level-post/383482/
Articles added:  383
Article is not about current topic
Link to article:  https://theprint.in/politics/bsy-diary-forged-says-jaitley-accuses-media-of-playing-along-with-opposition/210665/
Articles added:  383
Articles added:  384
Articles added:  385
Article is not about current topic
Link to article:  https://theprint.in/defence/lockheed-martin-says-it-wont-sell-f-21-jets-to-any-other-country-if-iaf

In [None]:
#Saving the dataframe to disk
theprint_df.to_pickle("theprint_dataframe", compression="zip")
theprint_df.shape

(1133, 6)

In [None]:
print("Number of articles whose topic needs to be checked: ", len(pd.Series(topic_other_list_2).unique()))
print("Number of links that threw exceptions: ", len(exceptions_links_list_2))

Number of articles whose topic needs to be checked:  755
Number of links that threw exceptions:  4


#### 3. Article 370

In [None]:
#Setting the topic
topic = 'article 370'

#Unpickling the links
links_list = links_unpickle(topic)

Number of links on the article 370 topic is 2346.


In [None]:
#Removing duplicate links
links_list = list(set(links_list))

#Dropping links that have the term 'video' in them
links_list = [l for l in links_list if 'video' not in l]
print("Number of links after cleaning: ", len(links_list))

Number of links after cleaning:  2315


Adding these articles to the dataframe.

In [None]:
#Collecting the links that were not added to the dataframe
topic_other_list_3, exceptions_links_list_3 = link_to_article(links_list, topic)

Articles added:  1
Articles added:  2
Articles added:  3
Articles added:  4
Articles added:  5
Article is not about current topic
Link to article:  https://theprint.in/politics/why-bjp-ally-nitish-kumar-is-silent-on-ram-mandir-bhoomi-pujan/476027/
Articles added:  5
Article is not about current topic
Link to article:  https://theprint.in/opinion/being-indian/win-some-lose-some-2019-for-bjp-was-modi-made-but-caa-protests-turned-up-a-surprise/342401/
Articles added:  5
Articles added:  6
Article is not about current topic
Link to article:  https://theprint.in/ani-press-releases/newj-amps-up-original-content-launches-weekly-talk-show-featuring-celebrated-indians-in-various-fields/705430/
Articles added:  6
Articles added:  7
Article is not about current topic
Link to article:  https://theprint.in/opinion/solution-to-pakistan-nepal-maps-india-pressing-reset-button-on-regional-blocs/476907/
Articles added:  7
Articles added:  8
Articles added:  9
Articles added:  10
Articles added:  11
Arti

Article is not about current topic
Link to article:  https://theprint.in/opinion/for-wealth-chasers-liberation-theology-is-marxist-for-others-its-justice-and-freedom/699101/
Articles added:  54
Article is not about current topic
Link to article:  https://theprint.in/india/ed-books-sharad-pawar-ajit-pawar-under-pmla-in-maharashtra-cooperative-bank-scam-case/296312/
Articles added:  54
Article is not about current topic
Link to article:  https://theprint.in/india/how-pakistani-lt-col-nisar-ahmed-won-over-indian-peers-after-stalling-their-advance-in-1965/275748/
Articles added:  54
Article is not about current topic
Link to article:  https://theprint.in/trawling-twitter/modi-rahul-smriti-sharad-yadav-grieve-side-by-side-for-one-of-a-kind-sushma-swaraj/273704/
Articles added:  54
Articles added:  55
Articles added:  56
Articles added:  57
Articles added:  58
Article is not about current topic
Link to article:  https://theprint.in/opinion/not-all-muslim-nations-love-modi-this-is-why-iran-ma

Article is not about current topic
Link to article:  https://theprint.in/opinion/letter-from-pakistan/imran-khan-slamming-modi-govts-cab-exposes-his-memory-loss/333920/
Articles added:  113
Articles added:  114
Article is not about current topic
Link to article:  https://theprint.in/india/on-caa-protests-in-up-modi-says-vandals-should-know-rights-and-duties-go-hand-in-hand/340732/
Articles added:  114
Articles added:  115
Article is not about current topic
Link to article:  https://theprint.in/india/nrc-must-for-national-security-implemented-in-bengal-amit-shah/299687/
Articles added:  115
Articles added:  116
Articles added:  117
Articles added:  118
Articles added:  119
Articles added:  120
Articles added:  121
Articles added:  122
Articles added:  123
Articles added:  124
Article is not about current topic
Link to article:  https://theprint.in/politics/goodbye-ghulam-nabi-azad-why-congress-stayed-silent-when-modi-cried-opponents-praised-him/602159/
Articles added:  124
Article is no

Articles added:  152
Articles added:  153
Articles added:  154
Article is not about current topic
Link to article:  https://theprint.in/defence/confident-of-getting-a-good-grant-from-modi-govts-budget-says-army-chief-naravane/355713/
Articles added:  154
Article is not about current topic
Link to article:  https://theprint.in/india/session-with-ips-officer-rema-rajeshwari-truly-engrossing/314314/
Articles added:  154
Article is not about current topic
Link to article:  https://theprint.in/politics/5-reasons-why-jharkhand-assembly-election-results-are-crucial-for-bjp/338696/
Articles added:  154
Article is not about current topic
Link to article:  https://theprint.in/politics/lockdown-on-ram-mandir-bhoomi-pujan-day-but-not-eid-bjp-says-mamata-govt-punishing-hindus/472102/
Articles added:  154
Articles added:  155
Article is not about current topic
Link to article:  https://theprint.in/theprint-profile/up-bjp-president-behind-modi-china-war-remark-was-once-a-congress-singh/532351/
Articl

Articles added:  197
Articles added:  198
Article is not about current topic
Link to article:  https://theprint.in/politics/haryanas-jats-love-modi-voters-state-equations-in-mind/308723/
Articles added:  198
Articles added:  199
Articles added:  200
Link is already present in the dataframe
Link to article:  https://theprint.in/india/article-370-farm-laws-nep-modi-govt-plans-booklet-to-showcase-major-reforms-since-2014/575116/
Articles added:  200
Articles added:  201
Articles added:  202
Article is not about current topic
Link to article:  https://theprint.in/diplomacy/in-a-first-indias-dr-reddys-labs-to-supply-a-generic-drug-to-chinas-public-hospitals/298035/
Articles added:  202
Articles added:  203
Articles added:  204
Article is not about current topic
Link to article:  https://theprint.in/opinion/maharashtra-haryana-send-a-message-against-bjp-arrogance-of-power/310822/
Articles added:  204
Article is not about current topic
Link to article:  https://theprint.in/last-laughs/a-tale-

Article is not about current topic
Link to article:  https://theprint.in/opinion/politically-correct/with-caa-kashmir-nrc-amit-shah-has-come-out-of-pm-modi-shadow/339197/
Articles added:  245
Articles added:  246
Articles added:  247
Article is not about current topic
Link to article:  https://theprint.in/india/bhagwats-muslim-population-remark-isnt-surprising-rss-view-on-demography-has-evolved/702852/
Articles added:  247
Articles added:  248
Articles added:  249
Article is not about current topic
Link to article:  https://theprint.in/india/ram-dharm-victory-for-ram-how-regional-papers-covered-ayodhya-bhoomi-pujan/476245/
Articles added:  249
Article is not about current topic
Link to article:  https://theprint.in/politics/at-bjp-hq-modi-is-sankatmochak-true-friend-sensitive-writer-poet-brave-child-and-more/293656/
Articles added:  249
Article is not about current topic
Link to article:  https://theprint.in/plugged-in/headlines-catch-up-with-ever-growing-rift-between-the-rbi-and-modi-

Articles added:  283
Article is not about current topic
Link to article:  https://theprint.in/politics/message-from-delhi-for-adityanath-bjp-set-to-lose-11-of-13-seats-where-he-campaigned/363658/
Articles added:  283
Articles added:  284
Articles added:  285
Articles added:  286
Articles added:  287
Articles added:  288
Articles added:  289
Article is not about current topic
Link to article:  https://theprint.in/diplomacy/eliminating-terrorism-is-a-precondition-for-cooperation-survival-says-jaishankar/297531/
Articles added:  289
Article is not about current topic
Articles added:  289
Articles added:  290
Article is not about current topic
Link to article:  https://theprint.in/politics/sanjay-raut-praful-patel-tr-baalu-men-wielding-power-in-delhi-for-their-regional-bosses/341660/
Articles added:  290
Article is not about current topic
Link to article:  https://theprint.in/thought-shot/katju-on-how-uks-anti-india-karkaria-on-universal-identity-crisis-muzzafar-ali-on-khayyam/280540/
Arti

Article is not about current topic
Link to article:  https://theprint.in/diplomacy/modi-to-address-un-on-27-september-will-steer-clear-of-internal-issue-kashmir/284072/
Articles added:  327
Articles added:  328
Article is not about current topic
Link to article:  https://theprint.in/india/delhis-deshbhakti-lessons-to-cover-kargil-martyrs-saurabh-kalia-vijayant-thapar-sisodia/299102/
Articles added:  328
Articles added:  329
Article is not about current topic
Link to article:  https://theprint.in/india/governance/govts-new-dth-rules-silent-on-which-namo-tv-like-entities-can-be-run-on-platform-channels/581002/
Articles added:  329
Article is not about current topic
Link to article:  https://theprint.in/opinion/politricks/modi-shah-think-they-dont-need-allies-like-sukhbir-badal-but-they-do/512489/
Articles added:  329
Articles added:  330
Article is not about current topic
Link to article:  https://theprint.in/opinion/army-chief-naravane-must-hold-back-on-china-border-issue-till-tibet-is-

Articles added:  371
Articles added:  372
Articles added:  373
Article is not about current topic
Link to article:  https://theprint.in/thought-shot/bjp-must-rethink-poll-strategy-says-verniers-new-data-law-threatens-users-says-apar-gupta/341474/
Articles added:  373
Articles added:  374
Articles added:  375
Articles added:  376
Articles added:  377
Articles added:  378
Articles added:  379
Article is not about current topic
Link to article:  https://theprint.in/opinion/letter-from-pakistan/imran-khan-lectures-modi-on-minorities-he-must-ban-forced-conversions-in-pakistan-first/290419/
Articles added:  379
Article is not about current topic
Link to article:  https://theprint.in/in-pictures/in-photos-bjp-launches-lok-sabha-elections-manifesto-promises-a-new-india-by-2022/218663/
Articles added:  379
Article is not about current topic
Link to article:  https://theprint.in/opinion/modi-monitor/modi-shah-discrediting-shaheen-bagh-not-just-for-delhi-polls-but-also-trump-visit/359337/
Article

Articles added:  432
Article is not about current topic
Link to article:  https://theprint.in/politics/yediyurappa-still-without-a-cabinet-as-shah-modi-look-to-send-karnataka-cm-a-message/274167/
Articles added:  432
Article is not about current topic
Link to article:  https://theprint.in/india/not-wearing-bangles-amarinder-warns-pakistan-against-misusing-kartarpur-corridor/318503/
Articles added:  432
Articles added:  433
Article is not about current topic
Link to article:  https://theprint.in/india/the-global-wildfire-of-street-protests-has-finally-reached-india/337515/
Articles added:  433
Article is not about current topic
Link to article:  https://theprint.in/plugged-in/media-joins-ed-and-cbi-on-hunt-for-missing-p-chidambaram-hindi-news-pursues-pakistan/279927/
Articles added:  433
Articles added:  434
Articles added:  435
Articles added:  436
Article is not about current topic
Link to article:  https://theprint.in/diplomacy/pakistan-delegation-arrives-in-india-for-talks-on-indus-

Articles added:  482
Articles added:  483
Articles added:  484
Articles added:  485
Articles added:  486
Articles added:  487
Article is not about current topic
Link to article:  https://theprint.in/national-interest/mohan-bhagwat-swadeshinomics-power-stop-modi-reform-economic-crisis/304662/
Articles added:  487
Articles added:  488
Article is not about current topic
Link to article:  https://theprint.in/diplomacy/5-key-highlights-of-indias-foreign-policy-that-jaishankar-amplified-in-us/302384/
Articles added:  488
Articles added:  489
Article is not about current topic
Link to article:  https://theprint.in/opinion/rahul-gandhi-may-be-her-role-model-in-life-but-priyanka-emulates-digvijaya-singh-politics/344999/
Articles added:  489
Articles added:  490
Article is not about current topic
Link to article:  https://theprint.in/india/amnesty-international-india-to-challenge-govt-order-freezing-its-accounts-in-karnataka-hc/512880/
Articles added:  490
Article is not about current topic
Link

Articles added:  539
Article is not about current topic
Link to article:  https://theprint.in/opinion/charges-quashed-against-twitters-jack-dorsey-for-holding-poster-more-ceos-must-speak-up-now/404919/
Articles added:  539
Articles added:  540
Article is not about current topic
Link to article:  https://theprint.in/science/modis-acupressure-work-some-mostly-pseudoscience/305593/
Articles added:  540
Articles added:  541
Article is not about current topic
Link to article:  https://theprint.in/last-laughs/trump-sees-filthy-india-and-kashmir-making-headlines-in-bihar/530130/
Articles added:  541
Articles added:  542
Articles added:  543
Articles added:  544
Article is not about current topic
Link to article:  https://theprint.in/diplomacy/modi-shames-pakistan-in-trump-presence-says-its-time-for-decisive-battle-against-terror/295339/
Articles added:  544
Articles added:  545
Articles added:  546
Articles added:  547
Articles added:  548
Articles added:  549
Articles added:  550
Articles ad

Articles added:  583
Article is not about current topic
Link to article:  https://theprint.in/plugged-in/primetime/india-tvs-horror-tale-from-delhis-lnjp-hospital-when-navika-kumar-lost-it-on-times-now/439473/
Articles added:  583
Article is not about current topic
Link to article:  https://theprint.in/trawling-twitter/kumaraswamys-flu-singhvi-looks-for-a-new-congress-president-kiran-bedi-remembers/274884/
Articles added:  583
Articles added:  584
Article is not about current topic
Link to article:  https://theprint.in/world/xi-makes-first-official-visit-to-lhasa-reminds-tibetans-about-following-communist-party/701337/
Articles added:  584
Article is not about current topic
Link to article:  https://theprint.in/opinion/all-parties-promise-achhe-din-bjp-3-ms-make-it-stand-apart/466047/
Articles added:  584
Articles added:  585
Articles added:  586
Article is not about current topic
Link to article:  https://theprint.in/opinion/lautan-ram-nishad-can-offer-a-counter-to-bjps-hindutva-if-on

Article is not about current topic
Link to article:  https://theprint.in/opinion/silence-speaks-louder-than-words-what-rss-chief-mohan-bhagwat-didnt-mention/123551/
Articles added:  610
Article is not about current topic
Link to article:  https://theprint.in/opinion/hemant-soren-could-teach-congress-a-thing-or-two-about-fighting-narendra-modi/340115/
Articles added:  610
Article is not about current topic
Link to article:  https://theprint.in/thought-shot/sanjaya-baru-on-derailed-rising-india-arvind-panagriya-says-dont-fear-china-in-rcep/321482/
Articles added:  610
Article is not about current topic
Link to article:  https://theprint.in/last-laughs/inflation-knocking-on-the-door-criticise-rahul-to-get-leave-and-batsman-satya-nadella/350504/
Articles added:  610
Article is not about current topic
Link to article:  https://theprint.in/judiciary/bhushan-apologises-for-harley-tweet-featuring-cji-bobde-but-stands-by-rest-of-his-posts/473572/
Articles added:  610
Articles added:  611
Articl

Article is not about current topic
Link to article:  https://theprint.in/talk-point/lack-of-strong-opposition-maharashtra-haryana-elections-no-contest/309060/
Articles added:  642
Article is not about current topic
Link to article:  https://theprint.in/diplomacy/modi-xi-meet-in-tamil-nadu-11-12-october-new-confidence-building-measures-priority/303151/
Articles added:  642
Article is not about current topic
Link to article:  https://theprint.in/opinion/citizenship-article-370-sc-st-act-muslims-kashmiris-dalits-protest-in-silos-in-india/336963/
Articles added:  642
Articles added:  643
Articles added:  644
Articles added:  645
Articles added:  646
Articles added:  647
Article is not about current topic
Link to article:  https://theprint.in/opinion/politically-correct/three-reasons-sonia-gandhi-should-make-one-last-sacrifice-congress/305243/
Articles added:  647
Article is not about current topic
Link to article:  https://theprint.in/trawling-twitter/priyanka-chaturvedi-trolls-amruta-fadn

Articles added:  689
Articles added:  690
Articles added:  691
Articles added:  692
Article is not about current topic
Link to article:  https://theprint.in/talk-point/bjp-loses-majority-in-jharkhand-are-modi-shah-no-longer-decisive-factors-in-state-polls/339584/
Articles added:  692
Article is not about current topic
Link to article:  https://theprint.in/india/refusal-to-accept-law-passed-by-legislature-is-a-path-to-anarchy-modi-on-caa-protests/360821/
Articles added:  692
Articles added:  693
Articles added:  694
Articles added:  695
Article is not about current topic
Link to article:  https://theprint.in/diplomacy/india-says-british-mp-debbie-abrahams-saw-ignored-visa-cancellation-email-and-landed-in-delhi/367424/
Articles added:  695
Ran into a problem
<class 'IndexError'> Line: 16
Error object:  list index out of range
Article is not about current topic
Link to article:  https://theprint.in/opinion/by-returning-dubai-princess-latifa-indias-hard-nosed-realism-paid-off/378161/
Artic

Articles added:  741
Articles added:  742
Article is not about current topic
Link to article:  https://theprint.in/opinion/dont-laud-up-police-commissionerates-yet-you-cant-be-judge-jury-executioner-ias-officer/467288/
Articles added:  742
Articles added:  743
Articles added:  744
Article is not about current topic
Link to article:  https://theprint.in/politics/caa-will-not-be-allowed-in-tamil-nadu-if-dmk-is-voted-to-power-says-stalin/630668/
Articles added:  744
Article is not about current topic
Link to article:  https://theprint.in/science/these-green-crackers-will-reduce-pollution-risk-on-diwali-without-dimming-the-fun/143274/
Articles added:  744
Articles added:  745
Article is not about current topic
Link to article:  https://theprint.in/politics/rahul-gandhi-the-only-alternative-to-narendra-modi-says-rajasthan-cm-ashok-gehlot/333701/
Articles added:  745
Link is already present in the dataframe
Link to article:  https://theprint.in/plugged-in/rahul-gandhi-should-give-rafale-scam

Articles added:  788
Articles added:  789
Articles added:  790
Articles added:  791
Article is not about current topic
Link to article:  https://theprint.in/judiciary/7-11-blasts-14-yrs-on-convicts-still-blame-indian-mujahideen-rely-on-police-documents/578461/
Articles added:  791
Article is not about current topic
Link to article:  https://theprint.in/opinion/indian-liberals-no-strategy-to-counter-rss-hindutva-constitutionalism/545766/
Articles added:  791
Articles added:  792
Article is not about current topic
Link to article:  https://theprint.in/politics/why-modi-has-reasons-to-be-anxious-as-he-prepares-for-a-second-term-in-office/239248/
Articles added:  792
Articles added:  793
Articles added:  794
Articles added:  795
Article is not about current topic
Link to article:  https://theprint.in/opinion/jammu-and-kashmirs-biannual-darbar-move-is-bleeding-india-must-stop-now/438842/
Articles added:  795
Article is not about current topic
Link to article:  https://theprint.in/india/nsa-

Articles added:  848
Articles added:  849
Articles added:  850
Articles added:  851
Articles added:  852
Articles added:  853
Article is not about current topic
Link to article:  https://theprint.in/opinion/bjp-wins-states-by-overriding-local-issues-with-ideological-bihar-is-latest-example/543062/
Articles added:  853
Article is not about current topic
Link to article:  https://theprint.in/opinion/why-the-lac-clash-with-china-wont-upset-modis-political-math/450173/
Articles added:  853
Article is not about current topic
Link to article:  https://theprint.in/diplomacy/no-interest-in-meeting-pramila-jayapal-jaishankar-says-after-cancelling-meet-with-us-panel/338191/
Articles added:  853
Article is not about current topic
Link to article:  https://theprint.in/politics/on-his-birthday-pm-modi-says-decision-to-abrogate-article-370-inspired-by-sardar-patel/292757/
Articles added:  853
Article is not about current topic
Link to article:  https://theprint.in/world/pakistan-marks-kashmirhour-wi

Article is not about current topic
Link to article:  https://theprint.in/yourturn/reader-view-ram-temple-rafale-jets-done-modi-should-focus-i-day-speech-on-health-economy/478657/
Articles added:  898
Article is not about current topic
Link to article:  https://theprint.in/india/3-photojournalists-win-pulitzer-for-kashmir-coverage-heres-list-of-indians-who-won-in-past/415004/
Articles added:  898
Article is not about current topic
Link to article:  https://theprint.in/politics/inside-story-of-how-rahul-gandhi-sonia-gandhi-forced-jyotiraditya-scindia-to-dump-congress/378558/
Articles added:  898
Articles added:  899
Article is not about current topic
Link to article:  https://theprint.in/politics/rss-sees-larger-design-in-delhi-communal-riots-blames-changing-demographic-profile/372552/
Articles added:  899
Article is not about current topic
Link to article:  https://theprint.in/defence/modi-gives-india-cds-its-biggest-defence-reform-what-it-means-how-itll-work/277325/
Articles added:  89

Articles added:  955
Articles added:  956
Articles added:  957
Article is not about current topic
Link to article:  https://theprint.in/india/aamir-khan-meets-turkeys-first-lady-in-istanbul-to-film-laal-singh-chaddha-there-from-oct/483065/
Articles added:  957
Link is already present in the dataframe
Link to article:  https://theprint.in/plugged-in/primetime/farmer-protest-misdirected-demands-not-linked-to-laws-yogi-on-news18-nirmala-on-times-now/566175/
Articles added:  957
Articles added:  958
Article is not about current topic
Link to article:  https://theprint.in/world/us-panel-chairperson-was-unaware-of-jayapals-name-on-list-of-delegates-meeting-jaishankar/339488/
Articles added:  958
Article is not about current topic
Link to article:  https://theprint.in/politics/sad-bsp-alliance-promises-free-power-up-to-400-units-quota-in-private-jobs-for-punjab-youth/708755/
Articles added:  958
Articles added:  959
Articles added:  960
Articles added:  961
Articles added:  962
Articles added

Articles added:  995
Article is not about current topic
Link to article:  https://theprint.in/thought-shot/owaisi-on-gandhis-truth-to-power-arvind-subramanian-calls-us-china-tired-boxers/309956/
Articles added:  995
Article is not about current topic
Link to article:  https://theprint.in/india/with-sharp-dig-at-pakistan-modi-asks-rest-of-south-asia-to-fight-terror-together/277326/
Articles added:  995
Articles added:  996
Articles added:  997
Articles added:  998
Articles added:  999
Articles added:  1000
Articles added:  1001
Article is not about current topic
Link to article:  https://theprint.in/opinion/pov/from-ola-to-zomato-to-social-media-i-now-hide-my-muslim-identity-everywhere/385137/
Articles added:  1001
Articles added:  1002
Articles added:  1003
Articles added:  1004
Article is not about current topic
Link to article:  https://theprint.in/india/8-people-protesting-with-anti-caa-nrc-rangoli-detained-in-chennai/342196/
Articles added:  1004
Article is not about current topic


Articles added:  1048
Articles added:  1049
Article is not about current topic
Link to article:  https://theprint.in/india/governance/green-tax-helped-delhi-collect-hundreds-of-crores-for-buses-so-where-are-they/143273/
Articles added:  1049
Article is not about current topic
Link to article:  https://theprint.in/features/watching-netflix-for-just-30-minutes-this-is-how-much-carbon-you-will-emit/376936/
Articles added:  1049
Articles added:  1050
Article is not about current topic
Link to article:  https://theprint.in/politics/bjp-manifesto-the-voice-of-an-isolated-man-says-rahul-gandhi-in-a-dig-at-pm-modi/218928/
Articles added:  1050
Articles added:  1051
Articles added:  1052
Articles added:  1053
Articles added:  1054
Article is not about current topic
Link to article:  https://theprint.in/world/kamala-harris-is-vocal-on-kashmir-unbreakable-bond-with-india-loves-her-idli-sambhar/480424/
Articles added:  1054
Articles added:  1055
Articles added:  1056
Article is not about current t

Articles added:  1080
Articles added:  1081
Article is not about current topic
Link to article:  https://theprint.in/trawling-twitter/kejriwals-chalisa-zaira-wasims-kashmir-blues-baba-sehgals-ode-to-shakira/359584/
Articles added:  1081
Articles added:  1082
Articles added:  1083
Articles added:  1084
Articles added:  1085
Articles added:  1086
Articles added:  1087
Article is not about current topic
Link to article:  https://theprint.in/opinion/side-effect-of-shivakumar-and-chidambarams-arrest-congress-gets-its-mojo-back/288054/
Articles added:  1087
Articles added:  1088
Articles added:  1089
Article is not about current topic
Link to article:  https://theprint.in/india/governance/rajnath-raises-issue-of-absentee-mps-in-parliament-as-govt-prepares-to-table-citizenship-bill/329544/
Articles added:  1089
Article is not about current topic
Link to article:  https://theprint.in/opinion/modi-govt-claiming-pok-is-great-for-diplomacy-but-wont-mean-much-militarily/293751/
Articles added:  10

Articles added:  1118
Articles added:  1119
Article is not about current topic
Link to article:  https://theprint.in/ani-press-releases/blamglam-aspires-to-be-the-one-stop-destination-for-entertainment-news/701074/
Articles added:  1119
Articles added:  1120
Article is not about current topic
Link to article:  https://theprint.in/india/amit-shah-seeks-support-for-caa-in-bihar-says-nitish-kumar-will-lead-nda-in-state-polls/350453/
Articles added:  1120
Articles added:  1121
Article is not about current topic
Link to article:  https://theprint.in/opinion/global-print/abki-baar-modi-must-stitch-a-better-friendship-with-biden-harris-us-sarkar/540715/
Articles added:  1121
Articles added:  1122
Articles added:  1123
Articles added:  1124
Articles added:  1125
Article is not about current topic
Link to article:  https://theprint.in/india/governance/gnctd-act-will-paralyse-governance-in-delhi-say-76-former-civil-servants/637377/
Articles added:  1125
Articles added:  1126
Article is not about

Articles added:  1183
Articles added:  1184
Articles added:  1185
Articles added:  1186
Link is already present in the dataframe
Link to article:  https://theprint.in/last-laughs/celestial-bodies-need-to-be-fed-and-delhi-tourists-arrive-to-enjoy-kashmir-freeze/303755/
Articles added:  1186
Articles added:  1187
Article is not about current topic
Link to article:  https://theprint.in/defence/army-jawan-killed-another-injured-as-pakistan-violates-ceasefire-along-loc-in-jks-poonch/514386/
Articles added:  1187
Article is not about current topic
Link to article:  https://theprint.in/india/arun-jaitleys-health-condition-still-critical-president-visits-him-at-aiims/277806/
Articles added:  1187
Articles added:  1188
Articles added:  1189
Articles added:  1190
Article is not about current topic
Link to article:  https://theprint.in/opinion/from-priyanka-chopra-to-doordarshan-patriotism-today-is-a-show-of-love-for-indian-military/277440/
Articles added:  1190
Article is not about current topic

Articles added:  1222
Articles added:  1223
Article is not about current topic
Link to article:  https://theprint.in/politics/those-with-history-of-making-bihar-bimaru-wont-be-allowed-to-return-says-pm-modi/529446/
Articles added:  1223
Articles added:  1224
Articles added:  1225
Article is not about current topic
Link to article:  https://theprint.in/plugged-in/primetime/rajdeep-rubika-question-congress-on-poll-losses-republic-bharat-attacks-gupkar-gang/545848/
Articles added:  1225
Articles added:  1226
Article is not about current topic
Link to article:  https://theprint.in/india/public-debate-needed-on-uniform-civil-code-up-to-govt-to-bring-bill-says-hosabale-of-rss/530689/
Articles added:  1226
Article is not about current topic
Link to article:  https://theprint.in/politics/marauding-bjp-sena-listless-opposition-maharashtra-most-lopsided-polls/308614/
Articles added:  1226
Articles added:  1227
Articles added:  1228
Articles added:  1229
Articles added:  1230
Article is not about

Articles added:  1270
Articles added:  1271
Article is not about current topic
Link to article:  https://theprint.in/ani-press-releases/a-story-of-revenge-and-power-meet-the-angre-family-of-cartel/711692/
Articles added:  1271
Article is not about current topic
Link to article:  https://theprint.in/politics/yogi-nitish-duel-on-caa-is-ndas-fixed-game-for-bihar-says-tejashwis-party/537714/
Articles added:  1271
Articles added:  1272
Articles added:  1273
Article is not about current topic
Link to article:  https://theprint.in/opinion/data-shows-upper-caste-voters-of-both-bjp-and-congress-support-core-hindutva-issues/320208/
Articles added:  1273
Articles added:  1274
Articles added:  1275
Articles added:  1276
Article is not about current topic
Link to article:  https://theprint.in/plugged-in/alert-in-kutch-is-tois-lead-express-ht-go-with-pakistan-missile-test-hindu-flags-nrc/284274/
Articles added:  1276
Articles added:  1277
Articles added:  1278
Article is not about current topic
Link

In [None]:
#Saving the dataframe to disk
theprint_df.to_pickle("theprint_dataframe", compression="zip")
theprint_df.shape

(2414, 6)

#### 4. Sabarimala

In [None]:
#Setting the topic
topic = 'sabarimala'

#Unpickling the links
links_list = links_unpickle(topic)

Number of links on the sabarimala topic is 429.


In [None]:
#Removing duplicate links
links_list = list(set(links_list))

#Dropping links that have the term 'video' in them
links_list = [l for l in links_list if 'video' not in l]
print("Number of links after cleaning: ", len(links_list))

Number of links after cleaning:  418


Adding these articles to the dataframe.

In [None]:
#Collecting the links that were not added to the dataframe
topic_other_list_4, exceptions_links_list_4 = link_to_article(links_list, topic)

Article is not about current topic
Link to article:  https://theprint.in/politics/how-pm-modi-transforms-into-campaigner-modi-as-he-mixes-official-trips-with-politics/191484/
Articles added:  0
Article is not about current topic
Link to article:  https://theprint.in/last-laughs/questionable-answers-and-keralas-wall-of-unity/172527/
Articles added:  0
Articles added:  1
Article is not about current topic
Link to article:  https://theprint.in/ani-press-releases/chetu-expands-global-offices-on-three-continents-amid-sustained-rapid-growth/697172/
Articles added:  1
Article is not about current topic
Link to article:  https://theprint.in/politics/single-seat-in-bengal-dip-in-kerala-tally-why-muslim-parties-didnt-do-well-this-time/651929/
Articles added:  1
Article is not about current topic
Link to article:  https://theprint.in/ani-press-releases/the-international-institute-of-academic-research-and-publications-in-india-launches-open-access-publishing-hub-with-f1000/702351/
Articles added: 

Articles added:  9
Link is already present in the dataframe
Link to article:  https://theprint.in/judiciary/a-year-counting-clock-ticking-but-sc-verdict-yet-awaited-on-over-20-pleas-on-article-370-scrapping/577946/
Articles added:  9
Articles added:  10
Link is already present in the dataframe
Link to article:  https://theprint.in/judiciary/mehbooba-muftis-daughter-seeks-sc-permission-for-her-mother-to-conduct-political-activity/509010/
Articles added:  10
Articles added:  11
Link is already present in the dataframe
Link to article:  https://theprint.in/india/2019-communication-blackout-pakistan-why-2020-saw-spike-in-kashmir-youth-joining-militants/580019/
Articles added:  11
Link is already present in the dataframe
Link to article:  https://theprint.in/last-laughs/bollywoods-silence-over-tanushrees-allegations-india-celebrates-the-sabarimala-verdict/126883/
Articles added:  11
Link is already present in the dataframe
Link to article:  https://theprint.in/india/doubling-farmers-income-

Articles added:  26
Articles added:  27
Article is not about current topic
Link to article:  https://theprint.in/india/governance/bihar-dgp-gupteshwar-pandeys-case-rare-approval-of-vrs-plea-by-ias-ips-takes-month-or-two/509039/
Articles added:  27
Article is not about current topic
Link to article:  https://theprint.in/opinion/how-nehru-and-yogi-adityanath-differed-on-naming-cities-from-chandigarh-to-prayagraj/151329/
Articles added:  27
Article is not about current topic
Link to article:  https://theprint.in/india/governance/lateral-entry-experts-have-become-like-any-ias-officer-a-year-into-recruitment/505476/
Articles added:  27
Article is not about current topic
Link to article:  https://theprint.in/india/governance/women-challenge-nizamuddin-dargah-ban-which-is-in-place-because-dead-can-see-them-naked/160743/
Articles added:  27
Articles added:  28
Article is not about current topic
Link to article:  https://theprint.in/india/governance/modi-govt-plans-india75-booklet-will-list-pro

Articles added:  38
Article is not about current topic
Link to article:  https://theprint.in/india/church-committed-land-fraud-in-andhra-canadian-donors-allege-fight-to-repair-damage/548563/
Articles added:  38
Article is not about current topic
Link to article:  https://theprint.in/plugged-in/pm-modi-on-front-pages-et-mint-tick-upturn-in-manufacturing-hotel-cons-in-hindu-nie/359328/
Articles added:  38
Article is not about current topic
Link to article:  https://theprint.in/opinion/how-three-key-supreme-court-judgments-shook-the-faith-of-scs-sts-and-obcs-in-one-year/184178/
Articles added:  38
Article is not about current topic
Link to article:  https://theprint.in/india/bjp-mps-say-save-india-urge-modi-govt-to-bring-in-population-control-law/509248/
Articles added:  38
Articles added:  39
Article is not about current topic
Link to article:  https://theprint.in/politics/bjp-provoking-tensions-in-temples-andhra-cm-chandrababu-naidu-at-mega-opposition-rally/180261/
Articles added:  39
A

Articles added:  53
Articles added:  54
Articles added:  55
Articles added:  56
Articles added:  57
Article is not about current topic
Link to article:  https://theprint.in/judiciary/7-11-blasts-14-yrs-on-convicts-still-blame-indian-mujahideen-rely-on-police-documents/578461/
Articles added:  57
Article is not about current topic
Link to article:  https://theprint.in/opinion/if-we-had-filmfare-awards-for-real-life-these-would-be-the-winners-of-2018/170459/
Articles added:  57
Article is not about current topic
Link to article:  https://theprint.in/plugged-in/primetime/pms-tears-move-news-anchors-aaj-tak-tears-into-uddhav-governments-attack-on-desh-prem/602301/
Articles added:  57
Article is not about current topic
Link to article:  https://theprint.in/opinion/jaggi-vasudev-is-the-ducati-riding-guru-for-the-modi-age/206289/
Articles added:  57
Link is already present in the dataframe
Link to article:  https://theprint.in/india/not-just-farm-laws-sugarcane-price-delayed-payments-also-cau

Articles added:  66
Article is not about current topic
Link to article:  https://theprint.in/politics/governance-not-ideology-how-keralas-ldf-managed-a-historic-return-to-power-under-pinarayi/650907/
Articles added:  66
Articles added:  67
Article is not about current topic
Link to article:  https://theprint.in/plugged-in/tharoor-calls-kejriwal-a-rude-name-on-cnn-news-18-arnab-goswamis-gandhi-vadra-expose/349190/
Articles added:  67
Articles added:  68
Article is not about current topic
Link to article:  https://theprint.in/health/this-delhi-doctor-doesnt-wear-full-ppe-when-treating-covid-19-patients-heres-why/505572/
Articles added:  68
Link is already present in the dataframe
Link to article:  https://theprint.in/politics/set-up-a-jpc-to-look-into-rafale-scam-says-aap-petitioner-sanjay-singh/163822/
Articles added:  68
Articles added:  69
Link is already present in the dataframe
Link to article:  https://theprint.in/opinion/if-india-keeps-diluting-its-liberal-character-the-west-will-

Article is not about current topic
Link to article:  https://theprint.in/ani-press-releases/best-crop-science-llp-becomes-first-agrochemical-company-in-india-to-manufacture-trifloxystrobin-technical/701871/
Articles added:  91
Articles added:  92
Link is already present in the dataframe
Link to article:  https://theprint.in/india/kashmiri-pandits-body-chief-on-fast-unto-death-demands-jk-govt-address-communitys-demands/507991/
Articles added:  92
Article is not about current topic
Link to article:  https://theprint.in/talk-point/tipu-jayanti-goes-to-karnataka-hc-sabarimala-style-flashpoint-or-real-historical-debate/147370/
Articles added:  92
Articles added:  93
Article is not about current topic
Link to article:  https://theprint.in/opinion/afghanistan-is-indias-one-reliable-neighbour-but-hekmatyars-surprise-entry-can-change-this/510017/
Articles added:  93
Articles added:  94
Articles added:  95
Article is not about current topic
Link to article:  https://theprint.in/politics/bjp-not-

Articles added:  106
Article is not about current topic
Link to article:  https://theprint.in/politics/congress-promise-of-law-protecting-sabarimala-tradition-not-a-u-turn-says-oommen-chandy/630499/
Articles added:  106
Article is not about current topic
Link to article:  https://theprint.in/opinion/bjp-leaders-cant-be-faulted-for-rushing-to-kumbh-mela-they-would-be-foolish-not-to/187181/
Articles added:  106
Article is not about current topic
Link to article:  https://theprint.in/politics/how-left-in-kerala-is-invoking-both-marx-and-market-to-keep-power-in-this-election/632376/
Articles added:  106
Article is not about current topic
Link to article:  https://theprint.in/national-interest/kanwarias-vs-halloweeners-indias-new-political-faultline/147733/
Articles added:  106
Link is already present in the dataframe
Link to article:  https://theprint.in/opinion/pakistans-upgrade-of-gilgit-baltistan-is-linked-to-the-india-china-stand-off/506599/
Articles added:  106
Article is not about cu

In [None]:
#Saving the dataframe to disk
theprint_df.to_pickle("theprint_dataframe", compression="zip")
theprint_df.shape

(2524, 6)

#### 5. Section 377

In [None]:
#Setting the topic
topic = 'section 377'

#Unpickling the links
links_list = links_unpickle(topic)

Number of links on the section 377 topic is 430.


In [None]:
#Removing duplicate links
links_list = list(set(links_list))

#Dropping links that have the term 'video' in them
links_list = [l for l in links_list if 'video' not in l]
print("Number of links after cleaning: ", len(links_list))

Number of links after cleaning:  398


Adding these articles to the dataframe.

In [None]:
#Collecting the links that were not added to the dataframe
topic_other_list_5, exceptions_links_list_5 = link_to_article(links_list, topic)

Article is not about current topic
Link to article:  https://theprint.in/politics/how-pm-modi-transforms-into-campaigner-modi-as-he-mixes-official-trips-with-politics/191484/
Articles added:  0
Article is not about current topic
Link to article:  https://theprint.in/last-laughs/questionable-answers-and-keralas-wall-of-unity/172527/
Articles added:  0
Link is already present in the dataframe
Link to article:  https://theprint.in/politics/shashi-tharoors-article-kicks-up-a-new-storm-on-sabarimala-splits-opinion/148163/
Articles added:  0
Article is not about current topic
Link to article:  https://theprint.in/ani-press-releases/chetu-expands-global-offices-on-three-continents-amid-sustained-rapid-growth/697172/
Articles added:  0
Article is not about current topic
Link to article:  https://theprint.in/opinion/i-am-a-sexual-abuse-survivor-no-i-am-not-a-woman/641099/
Articles added:  0
Link is already present in the dataframe
Link to article:  https://theprint.in/india/sabarimala-pilgrimag

Articles added:  6
Articles added:  7
Link is already present in the dataframe
Link to article:  https://theprint.in/features/techies-may-have-answer-to-sabarimala-ayodhya-rows-but-are-temples-or-mosques-ready/178685/
Articles added:  7
Link is already present in the dataframe
Link to article:  https://theprint.in/last-laughs/bollywoods-silence-over-tanushrees-allegations-india-celebrates-the-sabarimala-verdict/126883/
Articles added:  7
Article is not about current topic
Link to article:  https://theprint.in/india/uttarakhand-hc-questions-constitutionality-of-ban-on-slaughterhouses-in-haridwar-district/698007/
Articles added:  7
Articles added:  8
Article is not about current topic
Link to article:  https://theprint.in/opinion/guide-for-nris-resettling-in-india-after-trump-throws-them-out/159397/
Articles added:  8
Article is not about current topic
Link to article:  https://theprint.in/india/long-way-to-go-in-india-say-activists-on-zero-tolerance-to-female-genital-mutilation-day/6001

Articles added:  17
Article is not about current topic
Link to article:  https://theprint.in/india/whatsapp-group-tracker-mole-how-transport-mafia-planned-attack-on-ambala-ias-officer/508719/
Articles added:  17
Link is already present in the dataframe
Link to article:  https://theprint.in/plugged-in/newspapers-all-cheer-for-ins-arihant-and-indias-nuclear-triad/145907/
Articles added:  17
Article is not about current topic
Link to article:  https://theprint.in/india/minor-boys-from-ne-sexually-abused-in-ups-muzaffarnagar-ashram-godman-disciple-arrested/459187/
Articles added:  17
Article is not about current topic
Link to article:  https://theprint.in/india/congress-releases-list-of-86-candidates-for-kerala-assembly-polls/621786/
Articles added:  17
Link is already present in the dataframe
Link to article:  https://theprint.in/india/supreme-court-directs-kerala-govt-to-draw-up-law-on-administration-of-sabarimala-temple/323744/
Articles added:  17
Article is not about current topic
Link

Articles added:  19
Article is not about current topic
Link to article:  https://theprint.in/last-laughs/section-377-at-supreme-courts-doorstep-and-mumbais-latest-local-transport/82225/
Articles added:  19
Article is not about current topic
Link to article:  https://theprint.in/plugged-in/terrorism-has-no-religion-says-ravish-kumar-india-todays-questions-on-dsp-davinder-singh/349739/
Articles added:  19
Link is already present in the dataframe
Link to article:  https://theprint.in/theprint-essential/what-the-sabarimala-review-petition-supreme-court-will-rule-on-today-is-all-about/320668/
Articles added:  19
Article is not about current topic
Link to article:  https://theprint.in/judiciary/sc-stays-gujarat-hc-order-granting-two-week-furlough-to-asaram-bapus-son-narayan-sai/713710/
Articles added:  19
Link is already present in the dataframe
Link to article:  https://theprint.in/opinion/forgive-me-liberal-friends-but-i-cant-completely-overlook-faith-of-sabarimala-devotees/174990/
Article

Articles added:  24
Link is already present in the dataframe
Link to article:  https://theprint.in/judiciary/sc-9-judge-bench-on-sabarimala-to-now-hear-larger-issues-of-faith-vs-fundamental-rights/348671/
Articles added:  24
Link is already present in the dataframe
Link to article:  https://theprint.in/pre-truth/president-kovind-leaves-many-amused-how-a-wedding-invite-changed-rahul-gandhis-plans/92983/
Articles added:  24
Article is not about current topic
Link to article:  https://theprint.in/india/role-played-by-the-crucial-akhil-bharatiya-pratinidhi-sabha-of-rss-which-meets-this-week/620557/
Articles added:  24
Articles added:  25
Article is not about current topic
Link to article:  https://theprint.in/opinion/a-wish-list-your-honour-how-the-indian-judiciary-can-get-back-its-prestige-in-2019/171142/
Articles added:  25
Link is already present in the dataframe
Link to article:  https://theprint.in/india/faced-with-financial-crunch-sabarimala-1200-kerala-temples-plan-to-monetise-gold/

Articles added:  28
Article is not about current topic
Link to article:  https://theprint.in/india/rajkot-says-94-covid-deaths-in-3-months-but-crematorium-burial-ground-data-puts-it-at-1247/510172/
Articles added:  28
Article is not about current topic
Link to article:  https://theprint.in/opinion/mourn-idea-of-india-but-dont-forget-idea-of-people/488884/
Articles added:  28
Article is not about current topic
Link to article:  https://theprint.in/politics/between-the-lines-of-a-speech-by-chief-of-apolitical-rss-lies-an-overtly-political-pitch/136637/
Articles added:  28
Link is already present in the dataframe
Link to article:  https://theprint.in/india/39-covid-cases-in-sabarimala-since-pilgrim-season-began-last-week-temple-to-ramp-up-testing/553629/
Articles added:  28
Articles added:  29
Article is not about current topic
Link to article:  https://theprint.in/opinion/can-india-be-a-hindu-rashtra-and-hindus-a-minority-at-the-same-time/176361/
Articles added:  29
Article is not about 

Articles added:  34
Article is not about current topic
Link to article:  https://theprint.in/plugged-in/virat-kohli-end-of-the-world-moment-in-express-aatish-taseer-cries-foul-in-india-today/320850/
Articles added:  34
Articles added:  35
Link is already present in the dataframe
Link to article:  https://theprint.in/opinion/by-siding-with-sabarimala-devotees-shashi-tharoor-has-failed-the-liberal-in-him/176907/
Articles added:  35
Link is already present in the dataframe
Link to article:  https://theprint.in/opinion/telescope/ranveer-singh-deepika-padukone-wedding-has-finally-edged-out-breathless-rafale-debates/149550/
Articles added:  35
Link is already present in the dataframe
Link to article:  https://theprint.in/politics/in-an-u-turn-rahul-gandhi-now-says-he-cannot-take-an-open-and-shut-position-on-sabarimala/177555/
Articles added:  35
Articles added:  36
Link is already present in the dataframe
Link to article:  https://theprint.in/india/governance/two-women-begin-trekking-up-saba

Articles added:  39
Article is not about current topic
Link to article:  https://theprint.in/opinion/udf-had-a-chance-in-kerala-then-congress-played-a-dangerous-communal-game/627244/
Articles added:  39
Article is not about current topic
Link to article:  https://theprint.in/india/bjp-mp-subramanian-swamy-now-wants-to-reclaim-kashi-vishwanath-temple/501724/
Articles added:  39
Article is not about current topic
Link to article:  https://theprint.in/features/10-apps-to-take-you-from-casual-sex-to-marriage-vows/183478/
Articles added:  39
Link is already present in the dataframe
Link to article:  https://theprint.in/india/as-sabarimala-opens-kerala-cops-send-back-10-women-of-menstruating-age/322203/
Articles added:  39
Link is already present in the dataframe
Link to article:  https://theprint.in/last-laughs/rahuls-rafale-roar-congress-double-standards-on-sabarimala-and-it-raids-in-sandalwood/173061/
Articles added:  39
Article is not about current topic
Link to article:  https://theprin

Articles added:  43
Link is already present in the dataframe
Link to article:  https://theprint.in/india/governance/another-sabarimala-like-crisis-is-brewing-in-kerala/163897/
Articles added:  43
Article is not about current topic
Link to article:  https://theprint.in/politics/india-belongs-to-hindus-says-rss-chief-mohan-bhagwat-bats-for-bjp-in-temple-row/186637/
Articles added:  43
Article is not about current topic
Link to article:  https://theprint.in/plugged-in/give-peace-and-sidhu-a-chance-says-imran-khan/102735/
Articles added:  43
Articles added:  44
Article is not about current topic
Link to article:  https://theprint.in/judiciary/firm-meticulous-justice-indu-malhotra-retires-after-short-sc-stint-of-less-than-3-years/621376/
Articles added:  44
Articles added:  45
Link is already present in the dataframe
Link to article:  https://theprint.in/national-interest/bjp-could-be-shrinking-politically-but-is-winning-big-time-ideologically/328397/
Articles added:  45
Link is already pre

In [None]:
#Saving the dataframe to disk
theprint_df.to_pickle("theprint_dataframe", compression="zip")
theprint_df.shape

(2569, 6)

#### 6. CAA

In [None]:
#Setting the topic
topic = 'caa'

#Unpickling the links
links_list = links_unpickle(topic)

Number of links on the caa topic is 1826.


In [None]:
#Removing duplicate links
links_list = list(set(links_list))

#Dropping links that have the term 'video' in them
links_list = [l for l in links_list if 'video' not in l]
print("Number of links after cleaning: ", len(links_list))

Number of links after cleaning:  1789


Adding these articles to the dataframe.

In [None]:
#Collecting the links that were not added to the dataframe
topic_other_list_6, exceptions_links_list_6 = link_to_article(links_list, topic)

Article is not about current topic
Link to article:  https://theprint.in/india/zuckerberg-uses-kapil-mishras-delhi-riots-threat-to-outline-facebooks-hate-speech-policy/435845/
Articles added:  0
Link is already present in the dataframe
Link to article:  https://theprint.in/politics/shashi-tharoors-article-kicks-up-a-new-storm-on-sabarimala-splits-opinion/148163/
Articles added:  0
Articles added:  1
Article is not about current topic
Link to article:  https://theprint.in/opinion/india-blames-external-forces-for-export-mess-but-problem-is-internal-budget-2020-can-help/354860/
Articles added:  1
Article is not about current topic
Link to article:  https://theprint.in/opinion/akhilesh-yadav-is-counting-his-chickens-before-they-hatch/366788/
Articles added:  1
Articles added:  2
Link is already present in the dataframe
Link to article:  https://theprint.in/india/postpaid-mobile-services-in-kashmir-likely-to-resume-from-monday/304896/
Articles added:  2
Article is not about current topic
Li

Articles added:  16
Article is not about current topic
Link to article:  https://theprint.in/opinion/nhrc-chief-hl-dattu-will-quit-office-this-year-with-him-the-rights-body-must-wind-up-too/429996/
Articles added:  16
Article is not about current topic
Link to article:  https://theprint.in/plugged-in/mirror-now-news-18-focus-on-bengaluru-riots-abps-honey-trap-twist-in-sushant-case/480630/
Articles added:  16
Article is not about current topic
Link to article:  https://theprint.in/opinion/indian-media-waging-holy-war-against-muslims-hyenas/400407/
Articles added:  16
Link is already present in the dataframe
Link to article:  https://theprint.in/theprint-essential/the-religious-significance-of-the-18-steps-at-sabarimala-temple/173206/
Articles added:  16
Article is not about current topic
Link to article:  https://theprint.in/india/up-govt-stubborn-like-a-child-can-frame-me-in-another-case-says-kafeel-khan-after-release/493925/
Articles added:  16
Article is not about current topic
Link 

Articles added:  39
Articles added:  40
Articles added:  41
Link is already present in the dataframe
Link to article:  https://theprint.in/india/sc-seeks-response-from-centre-jammu-kashmir-on-plea-seeking-to-produce-farooq-abdullah/292026/
Articles added:  41
Articles added:  42
Link is already present in the dataframe
Link to article:  https://theprint.in/opinion/section-377-ruling-supreme-court-has-evolved-but-politicians-need-to-play-catch-up/112694/
Articles added:  42
Article is not about current topic
Link to article:  https://theprint.in/opinion/why-its-so-easy-for-modi-to-appropriate-tagore-or-anyone/594094/
Articles added:  42
Articles added:  43
Link is already present in the dataframe
Link to article:  https://theprint.in/india/only-40-gst-returns-filed-in-jk-due-to-internet-blockade-big-fall-in-revenue-collection/323492/
Articles added:  43
Articles added:  44
Articles added:  45
Article is not about current topic
Link to article:  https://theprint.in/opinion/lets-talk-abou

Articles added:  63
Article is not about current topic
Link to article:  https://theprint.in/india/yogendra-yadav-ram-guha-urge-nationalisation-of-property-to-fight-covid-step-back-after-row/427898/
Articles added:  63
Link is already present in the dataframe
Link to article:  https://theprint.in/statedraft/next-door-to-sabarimala-temple-district-a-menstruating-goddess-is-worshipped/173322/
Articles added:  63
Article is not about current topic
Link to article:  https://theprint.in/plugged-in/chidambaram-shivkumar-silly-old-fools-arnab-media-see-red-in-growth-blues/286323/
Articles added:  63
Link is already present in the dataframe
Link to article:  https://theprint.in/features/from-rbi-cbi-to-metoo-rafale-here-are-the-top-10-newsmakers-of-2018/170191/
Articles added:  63
Article is not about current topic
Link to article:  https://theprint.in/plugged-in/primetime/corona-gyaan-across-channels-tharoor-sibal-as-head-shoulders-on-times-now/380235/
Articles added:  63
Article is not about

Articles added:  80
Article is not about current topic
Link to article:  https://theprint.in/best-of-theprint-icymi/how-a-woman-who-couldnt-go-to-iit-because-she-got-married-became-isros-top-engineer/113965/
Articles added:  80
Article is not about current topic
Link to article:  https://theprint.in/opinion/global-print/from-modi-to-imran-khan-trumps-visit-to-delhi-holds-a-victory-for-all-sides/352380/
Articles added:  80
Link is already present in the dataframe
Link to article:  https://theprint.in/india/supreme-court-asks-centre-to-restore-normalcy-in-kashmir/292013/
Articles added:  80
Articles added:  81
Link is already present in the dataframe
Link to article:  https://theprint.in/india/governance/govt-to-procure-apples-directly-from-jammu-kashmir-farmers/289442/
Articles added:  81
Articles added:  82
Articles added:  83
Articles added:  84
Article is not about current topic
Link to article:  https://theprint.in/opinion/indias-young-are-no-more-charmed-by-modi-economy-and-ideolog

Articles added:  98
Article is not about current topic
Link to article:  https://theprint.in/india/how-former-punjab-dgp-kps-gill-approached-hit-back-at-terror-recalls-new-book/351413/
Articles added:  98
Link is already present in the dataframe
Link to article:  https://theprint.in/last-laughs/how-metoo-movement-forced-m-j-akbar-to-quit-and-the-sabarimala-protest/136490/
Articles added:  98
Article is not about current topic
Link to article:  https://theprint.in/opinion/foreign-media-loves-anti-bjp-champagne-socialists-columnists/415968/
Articles added:  98
Article is not about current topic
Link to article:  https://theprint.in/talk-point/sp-sinha-kashmir-rape-comment-is-it-time-for-strict-code-of-conduct-for-military-veterans/322663/
Articles added:  98
Link is already present in the dataframe
Link to article:  https://theprint.in/opinion/why-sabarimala-issue-leaves-instinctive-liberals-like-me-torn-shashi-tharoor/147759/
Articles added:  98
Link is already present in the dataframe


Articles added:  110
Article is not about current topic
Link to article:  https://theprint.in/opinion/global-print/why-pompeo-called-pakistan-army-chief-bajwa-and-not-jaishankar-after-soleimani-killing/345755/
Articles added:  110
Articles added:  111
Article is not about current topic
Link to article:  https://theprint.in/opinion/pov/us-is-indias-enemy-when-it-calls-our-treatment-of-minorities-friend-when-trump-hugs-modi/411372/
Articles added:  111
Article is not about current topic
Link to article:  https://theprint.in/politics/bjp-subverting-democracy-in-maharashtra-sabotaging-three-party-alliance-sonia-gandhi/327588/
Articles added:  111
Articles added:  112
Link is already present in the dataframe
Link to article:  https://theprint.in/india/governance/homes-of-cpm-leaders-bombed-in-kerala-amid-violence-over-sabarimala-row/173555/
Articles added:  112
Articles added:  113
Article is not about current topic
Link to article:  https://theprint.in/politics/bjp-not-an-election-winning-

Articles added:  136
Link is already present in the dataframe
Link to article:  https://theprint.in/india/opposition-asks-govt-to-shed-its-arrogance-over-farm-laws/602280/
Articles added:  136
Articles added:  137
Articles added:  138
Article is not about current topic
Link to article:  https://theprint.in/opinion/arnab-goswamis-attack-continues-rahul-priyanka-wheres-your-enough-is-enough-moment/407651/
Articles added:  138
Articles added:  139
Articles added:  140
Articles added:  141
Article is not about current topic
Link to article:  https://theprint.in/thought-shot/covid-19-kaushik-basu-wants-global-task-force-bahar-dutt-sees-a-silver-lining/379644/
Articles added:  141
Article is not about current topic
Link to article:  https://theprint.in/india/governance/ias-officers-say-no-recourse-left-against-arbitrary-appraisal-system-after-hc-upholds-it/377626/
Articles added:  141
Articles added:  142
Article is not about current topic
Link to article:  https://theprint.in/india/police-r

Articles added:  158
Article is not about current topic
Link to article:  https://theprint.in/india/nia-ed-probe-link-between-kerala-gold-smuggling-racket-and-pfi-anti-caa-protests/469895/
Articles added:  158
Article is not about current topic
Link to article:  https://theprint.in/india/sc-seeks-delhi-govts-response-on-sharjeel-imams-plea-for-clubbing-5-firs/412413/
Articles added:  158
Articles added:  159
Article is not about current topic
Link to article:  https://theprint.in/india/al-qaeda-terrorist-from-telangana-could-be-key-to-understanding-terror-groups-india-network/427662/
Articles added:  159
Article is not about current topic
Link to article:  https://theprint.in/india/rss-has-no-connection-with-politics-works-for-all-130-crore-indians-mohan-bhagwat/351600/
Articles added:  159
Articles added:  160
Article is not about current topic
Link to article:  https://theprint.in/politics/after-nrc-u-turn-nitish-at-crossroads-again-as-deputy-sushil-modi-announces-npr-in-bihar/344981

Articles added:  184
Article is not about current topic
Link to article:  https://theprint.in/opinion/state-of-congress-has-reached-such-a-low-that-its-showing-in-its-assam-bengal-kerala-plan/594082/
Articles added:  184
Article is not about current topic
Link to article:  https://theprint.in/opinion/no-matter-what-sc-decides-on-ayodhya-the-assertive-hindu-is-here-to-stay/307593/
Articles added:  184
Articles added:  185
Article is not about current topic
Link to article:  https://theprint.in/last-laughs/imran-khan-keeps-a-close-watch-as-the-sino-indian-rock-balances-on-thin-ice/304859/
Articles added:  185
Articles added:  186
Article is not about current topic
Link to article:  https://theprint.in/best-of-theprint-icymi/yogi-govt-wont-defy-tradition-will-hold-ayodhya-ram-navami-mela-despite-covid-19-threat/384529/
Articles added:  186
Articles added:  187
Link is already present in the dataframe
Link to article:  https://theprint.in/india/ajit-doval-says-not-one-bullet-fired-in-kashm

Articles added:  198
Article is not about current topic
Link to article:  https://theprint.in/politics/chandrashekhar-azad-demands-roll-back-of-lateral-entry-policy-threatens-parliament-gherao/601160/
Articles added:  198
Article is not about current topic
Link to article:  https://theprint.in/campus-voice/pegasus-scandal-shows-we-are-living-in-george-orwells-1984/704434/
Articles added:  198
Article is not about current topic
Link to article:  https://theprint.in/world/preliminarily-tests-show-two-people-in-singapore-have-uk-covid-strain/576905/
Articles added:  198
Article is not about current topic
Link to article:  https://theprint.in/features/kabir-khans-new-show-on-netajis-ina-will-ask-questions-about-history-and-idea-of-india/353493/
Articles added:  198
Article is not about current topic
Link to article:  https://theprint.in/thought-shot/yashwant-sinha-rcep-decision-cowardice-take-land-in-ayodhya-salman-khurshid-to-muslims/320874/
Articles added:  198
Article is not about curre

Link is already present in the dataframe
Link to article:  https://theprint.in/opinion/rss-has-changed-its-position-on-homosexuality-can-islamic-clerics-do-the-same/81529/
Articles added:  217
Link is already present in the dataframe
Link to article:  https://theprint.in/opinion/public-opinion-in-kashmir-has-now-been-pushed-away-from-india/323631/
Articles added:  217
Articles added:  218
Articles added:  219
Articles added:  220
Link is already present in the dataframe
Link to article:  https://theprint.in/best-of-theprint-icymi/new-ias-ips-irs-officers-will-adopt-mentor-a-teen-under-modi-govts-unique-programme/321628/
Articles added:  220
Articles added:  221
Articles added:  222
Link is already present in the dataframe
Link to article:  https://theprint.in/india/jk-sarpanch-official-killed-in-militant-attack-during-back-to-village-event-in-anantnag/326758/
Articles added:  222
Link is already present in the dataframe
Link to article:  https://theprint.in/judiciary/sc-9-judge-bench-o

Articles added:  251
Link is already present in the dataframe
Link to article:  https://theprint.in/india/after-3-killings-in-one-day-traders-told-to-load-trucks-on-highways-main-roads/307338/
Articles added:  251
Articles added:  252
Articles added:  253
Article is not about current topic
Link to article:  https://theprint.in/opinion/pov/tv-anchors-outrage-over-uyghurs-in-china-but-stay-quiet-about-indian-muslims/459921/
Articles added:  253
Article is not about current topic
Link to article:  https://theprint.in/judiciary/modi-govt-says-no-to-elevation-of-disco-dancer-lawyer-as-punjab-haryana-hc-judge/410071/
Articles added:  253
Article is not about current topic
Link to article:  https://theprint.in/opinion/congress-has-split-into-three-parties-and-not-one-of-them-is-good-at-politics/428824/
Articles added:  253
Link is already present in the dataframe
Link to article:  https://theprint.in/opinion/bjp-got-blinded-by-article-370-success-now-its-getting-a-reality-check/633902/
Articl

Articles added:  265
Articles added:  266
Articles added:  267
Articles added:  268
Article is not about current topic
Link to article:  https://theprint.in/defence/indian-forces-back-to-peacetime-alert-after-balakot-but-forward-bases-with-pakistan-active/263748/
Articles added:  268
Link is already present in the dataframe
Link to article:  https://theprint.in/india/jk-to-build-3-km-long-road-on-dal-lake-western-shore-to-stop-encroachment/330351/
Articles added:  268
Articles added:  269
Article is not about current topic
Link to article:  https://theprint.in/opinion/how-the-section-377-ruling-is-linked-to-the-problem-of-judges-appointments/114906/
Articles added:  269
Link is already present in the dataframe
Link to article:  https://theprint.in/india/jk-police-not-only-detaining-minors-but-making-them-pay-for-food-in-custody-say-families/319677/
Articles added:  269
Article is not about current topic
Link to article:  https://theprint.in/india/kashmiri-student-alleges-oyo-room-in-de

Articles added:  290
Articles added:  291
Link is already present in the dataframe
Link to article:  https://theprint.in/india/ghulam-nabi-azad-moves-supreme-court-to-visit-family-in-j-k-check-on-social-conditions/291899/
Articles added:  291
Articles added:  292
Articles added:  293
Article is not about current topic
Link to article:  https://theprint.in/thought-shot/sanjaya-baru-on-derailed-rising-india-arvind-panagriya-says-dont-fear-china-in-rcep/321482/
Articles added:  293
Articles added:  294
Article is not about current topic
Link to article:  https://theprint.in/opinion/does-modi-govt-know-what-the-endgame-of-the-crisis-it-has-unleashed-is/338117/
Articles added:  294
Articles added:  295
Articles added:  296
Articles added:  297
Article is not about current topic
Link to article:  https://theprint.in/theprint-essential/the-nso-group-behind-pegasus-list-its-murky-past-from-mexico-to-jamal-khashoggi-to-india/700425/
Articles added:  297
Article is not about current topic
Link t

Articles added:  329
Article is not about current topic
Link to article:  https://theprint.in/india/situation-in-coronavirus-hit-italy-iran-a-cause-of-great-concern-says-s-jaishankar/379226/
Articles added:  329
Articles added:  330
Link is already present in the dataframe
Link to article:  https://theprint.in/defence/with-no-internet-on-mobile-networks-kashmir-militants-using-satellite-phones/327738/
Articles added:  330
Articles added:  331
Articles added:  332
Link is already present in the dataframe
Link to article:  https://theprint.in/defence/terror-recruitment-in-kashmir-this-year-lower-than-2017-steep-fall-after-august-clampdown/326000/
Articles added:  332
Articles added:  333
Articles added:  334
Article is not about current topic
Link to article:  https://theprint.in/talk-point/how-will-cji-ranjan-gogois-legacy-impact-the-supreme-court/321009/
Articles added:  334
Articles added:  335
Articles added:  336
Article is not about current topic
Link to article:  https://theprint.

Articles added:  357
Article is not about current topic
Link to article:  https://theprint.in/india/sc-st-pension-scheme-free-electricity-mamata-readies-for-polls-with-populist-budget/362821/
Articles added:  357
Article is not about current topic
Link to article:  https://theprint.in/world/pakistan-lists-israel-editor-among-those-spewing-anti-state-fake-news-amateurish-he-hits-back/714704/
Articles added:  357
Articles added:  358
Article is not about current topic
Link to article:  https://theprint.in/diplomacy/india-says-british-mp-debbie-abrahams-saw-ignored-visa-cancellation-email-and-landed-in-delhi/367424/
Articles added:  358
Article is not about current topic
Link to article:  https://theprint.in/india/modi-to-attend-netajis-125th-birth-anniversary-programmes-during-6-hour-long-kolkata-trip/589130/
Articles added:  358
Article is not about current topic
Link to article:  https://theprint.in/thought-shot/ajit-ranade-raja-mohan-saran-on-coronavirus-impact-cp-chandrasekhar-on-yes

Articles added:  380
Article is not about current topic
Link to article:  https://theprint.in/opinion/from-kashmir-to-kabul-why-pakistan-wants-extradition-of-iskp-leader-caught-in-afghanistan/400779/
Articles added:  380
Articles added:  381
Articles added:  382
Link is already present in the dataframe
Link to article:  https://theprint.in/india/20-students-kashmir-schools-100-attendance-jammu/308704/
Articles added:  382
Articles added:  383
Articles added:  384
Article is not about current topic
Link to article:  https://theprint.in/plugged-in/narendra-modi-features-on-front-pages-with-new-found-cap/138300/
Articles added:  384
Articles added:  385
Link is already present in the dataframe
Link to article:  https://theprint.in/india/who-is-dr-mubeen-shah-the-kashmiri-whose-freedom-us-lawmakers-are-so-worried-about/322925/
Articles added:  385
Link is already present in the dataframe
Link to article:  https://theprint.in/india/no-separate-flag-constitution-but-govt-keen-to-conclude-nag

Articles added:  408
Article is not about current topic
Link to article:  https://theprint.in/opinion/indian-citizens-and-media-have-been-terrorised-enough-with-sedition-sc-must-end-it-now/442914/
Articles added:  408
Articles added:  409
Articles added:  410
Link is already present in the dataframe
Link to article:  https://theprint.in/plugged-in/jagran-lists-art-370-as-modi-2-0s-success-dinamani-calls-chandrayaan-2-isros-next-leap/291496/
Articles added:  410
Article is not about current topic
Link to article:  https://theprint.in/trawling-twitter/omar-abdullah-is-back-rahul-is-a-sad-kapil-mishra-amit-malviya-are-happy/387256/
Articles added:  410
Link is already present in the dataframe
Link to article:  https://theprint.in/last-laughs/modis-obsession-with-the-past-turncoat-politicians-indian-version-of-corona/361361/
Articles added:  410
Link is already present in the dataframe
Link to article:  https://theprint.in/india/govt-deal-no-lifesaver-for-kashmir-apple-growers-could-leave-

Articles added:  431
Articles added:  432
Article is not about current topic
Link to article:  https://theprint.in/opinion/covid-an-excuse-to-push-indian-muslims-out-of-informal-sector-jobs-apartheid-the-next-step/398236/
Articles added:  432
Articles added:  433
Article is not about current topic
Link to article:  https://theprint.in/india/delhi-court-extends-sharjeel-imams-police-custody-by-3-days-in-uapa-case/493037/
Articles added:  433
Article is not about current topic
Link to article:  https://theprint.in/opinion/politically-correct/up-cm-adityanath-emerges-as-modis-nearest-political-clone-another-potential-successor/409477/
Articles added:  433
Link is already present in the dataframe
Link to article:  https://theprint.in/world/trump-again-offers-to-mediate-kashmir-issue-says-india-pakistan-tensions-less-heated-now/289647/
Articles added:  433
Articles added:  434
Articles added:  435
Articles added:  436
Link is already present in the dataframe
Link to article:  https://thepri

Articles added:  453
Link is already present in the dataframe
Link to article:  https://theprint.in/opinion/by-siding-with-sabarimala-devotees-shashi-tharoor-has-failed-the-liberal-in-him/176907/
Articles added:  453
Articles added:  454
Link is already present in the dataframe
Link to article:  https://theprint.in/opinion/telescope/ranveer-singh-deepika-padukone-wedding-has-finally-edged-out-breathless-rafale-debates/149550/
Articles added:  454
Article is not about current topic
Link to article:  https://theprint.in/india/shaheen-bagh-now-has-a-media-registration-desk-to-ensure-no-provocative-reportage/361343/
Articles added:  454
Articles added:  455
Articles added:  456
Article is not about current topic
Link to article:  https://theprint.in/opinion/what-the-rebellion-of-alok-verma-tells-us-about-narendra-modi/140075/
Articles added:  456
Articles added:  457
Article is not about current topic
Link to article:  https://theprint.in/india/north-east-delhi-riots-part-of-well-hatched-c

Articles added:  478
Link is already present in the dataframe
Link to article:  https://theprint.in/india/with-article-370-gone-maharashtra-govt-plans-tourist-resorts-in-j-k-ladakh/286102/
Articles added:  478
Link is already present in the dataframe
Link to article:  https://theprint.in/diplomacy/japan-looked-at-situation-in-kashmir-very-carefully-hopes-for-a-peaceful-resolution/328814/
Articles added:  478
Article is not about current topic
Link to article:  https://theprint.in/opinion/india-wealthy-happily-donate-god-govt-loathe-helping-needy-poor/390206/
Articles added:  478
Article is not about current topic
Link to article:  https://theprint.in/politics/india-belongs-to-everyone-bjp-allies-object-to-hate-speeches-over-shaheen-bagh/359530/
Articles added:  478
Articles added:  479
Articles added:  480
Articles added:  481
Articles added:  482
Article is not about current topic
Link to article:  https://theprint.in/politics/amit-shah-adityanath-anurag-thakur-5-most-provocative-spee

Articles added:  499
Articles added:  500
Article is not about current topic
Link to article:  https://theprint.in/health/whats-behind-delhis-o2-crisis-a-critical-calculation-kejriwal-modi-govts-forgot-to-make/655179/
Articles added:  500
Article is not about current topic
Link to article:  https://theprint.in/judiciary/supreme-court-refuses-to-set-up-panel-to-probe-jamia-protests-asks-petitioners-to-move-hc/336582/
Articles added:  500
Article is not about current topic
Link to article:  https://theprint.in/india/governance/delhi-rioters-invoked-partition-drama-gadar-as-they-attacked-muslim-women-panel-says/463200/
Articles added:  500
Articles added:  501
Article is not about current topic
Link to article:  https://theprint.in/opinion/modi-govts-liberal-cloak-will-drop-if-court-considers-civil-rights-for-lgbtq-people/83881/
Articles added:  501
Articles added:  502
Articles added:  503
Link is already present in the dataframe
Link to article:  https://theprint.in/thought-shot/yamini-

Article is not about current topic
Link to article:  https://theprint.in/thought-shot/7-steps-to-dictatorship-indias-coronavirus-challenge-aaps-winning-political-formula/368846/
Articles added:  519
Link is already present in the dataframe
Link to article:  https://theprint.in/india/public-transport-resume-operations-in-kashmir-markets-stay-open-longer-in-srinagar/320855/
Articles added:  519
Articles added:  520
Article is not about current topic
Link to article:  https://theprint.in/politics/kerala-governor-arif-khan-criticises-farm-laws-reads-out-anti-centre-remarks-in-assembly/581925/
Articles added:  520
Articles added:  521
Articles added:  522
Article is not about current topic
Link to article:  https://theprint.in/politics/bhim-army-chief-azad-ready-for-electoral-plunge-in-bihar-up-eyes-dalit-muslim-obc-votes/375474/
Articles added:  522
Article is not about current topic
Link to article:  https://theprint.in/talk-point/will-amit-shahs-open-door-policy-lead-to-congressisation-o

Articles added:  544
Articles added:  545
Articles added:  546
Article is not about current topic
Link to article:  https://theprint.in/india/governance/rajnath-raises-issue-of-absentee-mps-in-parliament-as-govt-prepares-to-table-citizenship-bill/329544/
Articles added:  546
Article is not about current topic
Link to article:  https://theprint.in/plugged-in/slump-in-markets-crude-oil-prices-coronavirus-political-turmoil-in-mp-rule-front-pages/378417/
Articles added:  546
Article is not about current topic
Link to article:  https://theprint.in/india/up-cop-accuses-5-ips-officers-of-corruption-ganging-up-with-extortionists-in-letter-to-dgp/344316/
Articles added:  546
Articles added:  547
Articles added:  548
Article is not about current topic
Link to article:  https://theprint.in/india/indian-tv-medias-chandrayaan-2-coverage-modi-pakistan-deepak-chaurasias-tinfoil-suit/288460/
Articles added:  548
Article is not about current topic
Link to article:  https://theprint.in/politics/priyanka

Articles added:  571
Articles added:  572
Article is not about current topic
Link to article:  https://theprint.in/iwitness/observing-big-changes-digging-in-to-the-best-cuisine-humming-kishore-on-assam-poll-trail/630493/
Articles added:  572
Article is not about current topic
Link to article:  https://theprint.in/india/man-arrested-for-hatching-conspiracy-to-incite-communal-riots-in-northeast-delhi/378006/
Articles added:  572
Article is not about current topic
Link to article:  https://theprint.in/opinion/family-elder-modi-not-aggressive-amit-shah-govts-face-in-covid-19-fight/391611/
Articles added:  572
Articles added:  573
Articles added:  574
Article is not about current topic
Link to article:  https://theprint.in/opinion/reclaiming-indian-pluralism-will-need-annihilation-of-congress/485212/
Articles added:  574
Articles added:  575
Article is not about current topic
Link to article:  https://theprint.in/world/all-licenses-issued-to-commercial-pilots-genuine-says-pakistan-aviation-

Articles added:  605
Article is not about current topic
Link to article:  https://theprint.in/opinion/pov/uncle-please-sit-jaggi-vasudev-needs-to-shut-up-on-things-he-has-no-clue-about/367400/
Articles added:  605
Article is not about current topic
Link to article:  https://theprint.in/politics/jobs-shaheen-bagh-focus-of-rahul-and-priyanka-gandhis-first-joint-rally-outside-up/359742/
Articles added:  605
Articles added:  606
Articles added:  607
Article is not about current topic
Link to article:  https://theprint.in/opinion/bjp-and-congress-use-tipu-sultan-for-politics-but-are-shockingly-ignorant-of-his-history/147767/
Articles added:  607
Article is not about current topic
Link to article:  https://theprint.in/diplomacy/indias-diversity-should-be-kept-as-it-is-new-french-ambassador-emmanuel-lenain/336273/
Articles added:  607
Articles added:  608
Article is not about current topic
Link to article:  https://theprint.in/opinion/plague-tested-gandhi-and-patel-leadership-style-just-like-

Articles added:  618
Article is not about current topic
Link to article:  https://theprint.in/opinion/move-over-angry-young-man-2018-was-ruled-by-angry-young-women-in-bollywood/168598/
Articles added:  618
Article is not about current topic
Link to article:  https://theprint.in/politics/article-370-modi-take-backseat-in-haryana-elections-as-jobs-caste-dominate-poll-scene/307884/
Articles added:  618
Article is not about current topic
Link to article:  https://theprint.in/opinion/not-modi-not-yogi-its-ram-versus-rest-for-bjp-in-up-2022/479085/
Articles added:  618
Article is not about current topic
Link to article:  https://theprint.in/plugged-in/republic-on-sushant-murder-times-now-super-stunner-letter-tribute-to-rahat-indori/479923/
Articles added:  618
Articles added:  619
Link is already present in the dataframe
Link to article:  https://theprint.in/plugged-in/todays-news-imrans-interviews-on-nuclear-war-kashmir-godavari-deaths-yogi-on-nrc/292091/
Articles added:  619
Article is not

Articles added:  643
Articles added:  644
Articles added:  645
Article is not about current topic
Link to article:  https://theprint.in/india/jnu-students-have-time-and-again-upheld-what-it-means-to-be-a-national-university/350320/
Articles added:  645
Link is already present in the dataframe
Link to article:  https://theprint.in/india/modi-govt-has-sent-rs-800-cr-to-jk-panchayats-but-no-projects-planned-yet-to-spend-it/286945/
Articles added:  645
Article is not about current topic
Link to article:  https://theprint.in/politics/tmc-appeals-to-left-congress-to-support-mamata-banerjee-against-divisive-bjp/584773/
Articles added:  645
Link is already present in the dataframe
Link to article:  https://theprint.in/opinion/section-377-exposes-bjp-and-congress-doublespeak-on-homosexuality/81783/
Articles added:  645
Article is not about current topic
Link to article:  https://theprint.in/politics/bjp-will-win-26-of-30-bengal-seats-that-voted-in-1st-phase-says-amit-shah/630139/
Articles added

In [None]:
#Saving the dataframe to disk
theprint_df.to_pickle("theprint_dataframe", compression="zip")
theprint_df.shape

(3216, 6)

In [None]:
#To load the pickled dataframe
theprint_df = pd.read_pickle("theprint_dataframe", compression="zip")
theprint_df.shape

(3216, 6)

Let's check if there are any rows where the article wasn't picked up by the code.

In [None]:
theprint_df[(theprint_df['Title'] == 'no match') | (theprint_df['Date'] == 'no match') 
              | (theprint_df['Authors'] == 'no match') | (theprint_df['Article'] == 'no match')]

Unnamed: 0,Title,Link,Date,Authors,Topic,Article
690,Punjab CM Amarinder Singh urges farmers to vac...,https://theprint.in/india/punjab-cm-amarinder-...,no match,no match,farm laws,Chandigarh : Punjab Chief Minister Amarinder S...


In [None]:
#Dropping this row
theprint_df.drop([690], inplace=True)

Let's select a few articles at random to check.

In [None]:
sample_df = theprint_df.sample(n=10)
sample_df

Unnamed: 0,Title,Link,Date,Authors,Topic,Article
1111,"Congress outs new page of MoD Rafale note, hin...",https://theprint.in/defence/congress-outs-new-...,"9 February, 2019 1:29 pm",Snehesh Alex Philip,rafale,A Union Defence Ministry note from 2015 has tr...
1489,"If India is in crisis, it is because good guys...",https://theprint.in/opinion/if-india-is-in-cri...,"27 August, 2019 8:59 am",Nitin Pai,article 370,"To arrest India’s decline, it is important tha..."
2190,Kashmiris are dodging internet shutdown to wat...,https://theprint.in/india/kashmiris-are-dodgin...,"12 December, 2019 11:36 am",Azaan Javaid,article 370,Dirilis Ertugrul is a historical fiction serie...
1573,"J&amp;K leaders Sajad Lone, Waheed Parra relea...",https://theprint.in/india/jk-leaders-sajad-lon...,"5 February, 2020 2:51 pm",PTI,article 370,People's Conference leader Sajad Lone and PDP'...
39,Farmer unions call for Bharat bandh on 26 March,https://theprint.in/india/farmer-unions-call-f...,"10 March, 2021 9:23 pm",PTI,farm laws,The day will mark four months of the farmers’ ...
270,Draconian Essential Commodities Act needs to g...,https://theprint.in/opinion/draconian-essentia...,"25 January, 2019 10:41 am",Ila Patnaik,farm laws,The fear of the Essential Commodities Act may ...
1020,Rafale shows HAL desperately needs competition,https://theprint.in/50-word-edit/rafale-shows-...,"3 October, 2018 6:37 pm",ThePrint Team,rafale,The 50-word Edit — ThePrint view on the most...
587,"65-yr-old ‘attempts suicide’ at Singhu border,...",https://theprint.in/india/65-yr-old-attempts-s...,"21 December, 2020 9:14 pm",Sravasti Dasgupta,farm laws,Doctors at PGIMS Rohtak say condition of Niran...
1139,Postpaid mobile services in Kashmir likely to ...,https://theprint.in/india/postpaid-mobile-serv...,"12 October, 2019 12:58 pm",PTI,article 370,J&amp;K administration officials say postpaid ...
2196,"Tweets, tantrums, half-truths – Pakistan minis...",https://theprint.in/opinion/tweets-tantrums-ha...,"3 September, 2019 11:00 am",Akshobh Giridharadas,article 370,Pakistan foreign minister Shah Mahmood Qureshi...


In [None]:
print(theprint_df['Link'][2196])
print(theprint_df['Article'][2196])

https://theprint.in/opinion/tweets-tantrums-half-truths-pakistan-minister-shah-mahmood-qureshis-strategy-for-kashmir/285718/
Pakistan foreign minister Shah Mahmood Qureshi has long been seen as a Kashmir hawk, but the latest crisis has exposed his inadequacies only too well.P akistan’s foreign minister Shah Mahmood Qureshi has been at the centre of several crises that have altered India-Pakistan relationship in the recent years. He was the foreign minister  during  the Mumbai 26/11 terror attacks in 2008. His second stint, this time in the Imran Khan government, has been rocked by the Pulwama attack, the Balakot airstrikes and now the Modi government’s decision to abrogate Article 370 for Jammu and Kashmir. But these moments of tensions appear to have done nothing to sharpen his diplomacy in the times of crisis. Shah Mahmood Qureshi seems to have no concrete policy to deal with the situation arising out of revocation of Article 370. He appears to be more preoccupied with bombastic and 

Next, let's check if the articles on each topic contain the relevant keywords.

In [None]:
#Initializing an empty list to collect the indices of the articles that are to be dropped
articles_to_drop = []

In [None]:
#Checking the articles on farm laws
theprint_df[(theprint_df['Topic'] == 'farm laws') & ~theprint_df['Article'].str.contains('farm|agri', case=False) & ~theprint_df['Title'].str.contains('farm|agri', case=False)]

Unnamed: 0,Title,Link,Date,Authors,Topic,Article
59,Deepak Chaurasia says US should learn from Ind...,https://theprint.in/plugged-in/primetime/deepa...,"8 January, 2021 9:21 am",Angana Chakrabarti,farm laws,A quick take on what prime time TV news talked...
351,Delhi’s ‘customary welcome’ &amp; govt’s ‘tric...,https://theprint.in/last-laughs/delhis-customa...,"27 November, 2020 6:04 pm",Tenzin Zompa,farm laws,"The best cartoons of the day, chosen by the ed..."
483,"90% of milk in India comprises A2 protein, say...",https://theprint.in/best-of-theprint-icymi/90-...,"30 January, 2021 3:39 pm",ThePrint Team,farm laws,"A selection of the best news reports, analysis..."
726,Supreme Court upholds power of Arvind Kejriwal...,https://theprint.in/plugged-in/supreme-court-u...,"5 July, 2018 8:18 am",Nandita Singh,farm laws,Business Class Reliance Jio gets ready for the...


In [None]:
print(theprint_df['Link'][726])
print(theprint_df['Article'][726])

https://theprint.in/plugged-in/supreme-court-upholds-power-of-arvind-kejriwal-govt-and-whatsapp-is-horrified/79142/
Business Class Reliance Jio gets ready for the next round of disruption in the telecom sector. The company informed the government that it will be launching voice over WiFi services very soon, thus enabling calls in areas with poor mobile signals,  reports   The Economic Times . Remember the fanfare with which Tata Nano was launched in 2008? India’s ‘magic vehicle’ is nearing its end with just one unit produced last month,  reports   The Economic Times . The entry-level car sold just three units in June in the domestic market. Point of View The Supreme Court Wednesday restored the authority of the elected government in Delhi in administrative matters. The Indian Express, in its  editorial,  writes, “SC upholds a basic democratic principle: Elected governments have the upper hand.” The Hindu added in its editorial, “The SC clarifies an elected government cannot be undermin

In [None]:
articles_to_drop.append(726)

In [None]:
len(articles_to_drop)

2

Let's also drop all articles that are on primetime and last-laughs.

In [None]:
primetime_indices = theprint_df.index[theprint_df['Link'].str.contains('primetime')]
len(primetime_indices)

26

In [None]:
laughs_indices = theprint_df.index[theprint_df['Link'].str.contains('last-laugh')]
len(laughs_indices)

104

In [None]:
#Dropping these articles
print("Number of articles before dropping: ", theprint_df.shape[0])

Number of articles before dropping:  3187


In [None]:
theprint_df.drop(articles_to_drop, inplace=True)
theprint_df.drop(primetime_indices, inplace=True)

In [None]:
theprint_df.drop(laughs_indices, inplace=True)
print("Number of articles after dropping: ", theprint_df.shape[0])

Number of articles after dropping:  3083


In [None]:
#Checking the articles on rafale
theprint_df[(theprint_df['Topic'] == 'rafale') & ~theprint_df['Article'].str.contains('rafale|dassault', case=False) & ~theprint_df['Title'].str.contains('rafale|dassault', case=False)]

Unnamed: 0,Title,Link,Date,Authors,Topic,Article
787,"To keep IAF flying, India will pull aircraft f...",https://theprint.in/india/governance/to-keep-i...,"18 October, 2018 9:37 am",Sujan Dutta,rafale,India is the only air force in the world still...
882,"Clean energy, tech in focus as France looks be...",https://theprint.in/defence/clean-energy-tech-...,"29 March, 2021 11:03 am",Snehesh Alex Philip,rafale,France is a significant source of FDI in India...
981,Rahul Gandhi must decide if Congress is a Euro...,https://theprint.in/opinion/rahul-gandhi-must-...,"23 May, 2019 3:44 pm",Abhijit Iyer-Mitra,rafale,Safe to say national security plank will alway...


In [None]:
#Initializing an empty list to collect the indices of the articles that are to be dropped
articles_to_drop = []

In [None]:
print(theprint_df['Link'][981])
print(theprint_df['Article'][981])

https://theprint.in/opinion/rahul-gandhi-must-decide-if-congress-is-a-european-ngo-or-a-party-in-nationalist-india/239546/
Safe to say national security plank will always favour the BJP, unless Rahul Gandhi replaces his national security advisers.I ndian elections and national security have a seemingly unpredictable relationship. The defeat in the 1962 war with China and the victory in the 1971 war happened immediately after elections earlier in the year and can’t be used as gauges. However, the Sri Lanka fiasco and Bofors took a heavy toll on Rajiv Gandhi,  wiping out  his government in the 1989 Lok Sabha elections. The victory in the Kargil War propelled Atal Bihari Vajpayee to a  victory  in 1999. The 26/11 Mumbai attacks were undeniably a massive intelligence and security failure, and yet  in state elections  a few days later, the Congress trounced the BJP. Confused? So am I. While overarching trends are difficult to make out, there are some allegories that bear repeating. Subraman

In [None]:
articles_to_drop.append(981)

In [None]:
len(articles_to_drop)

3

In [None]:
#Checking the articles on article 370
theprint_df[(theprint_df['Topic'] == 'article 370') & ~theprint_df['Article'].str.contains('370|kashmir|j&k', case=False) & ~theprint_df['Title'].str.contains('370|kashmir|j&k', case=False)]

Unnamed: 0,Title,Link,Date,Authors,Topic,Article
1422,"Hair rebellion — from Kautilya, Omar Abdullah ...",https://theprint.in/opinion/pov/hair-rebellion...,"26 April, 2020 1:34 pm",Shubhangi Misra,article 370,"To deal with boredom, anxiety and restlessness..."
1599,Modi govt needs to offer clarity on why it red...,https://theprint.in/50-word-edit/modi-govt-nee...,"5 August, 2019 8:12 pm",ThePrint Team,article 370,"ThePrint view on the most important issues, in..."
1838,Ram Mandir can’t wait for Covid vaccine. Modi ...,https://theprint.in/opinion/ram-mandir-cant-wa...,"28 July, 2020 8:52 am",Ruhi Tewari,article 370,"For BJP, Ayodhya issue is the most significant..."
1892,NIA files chargesheet against six people for h...,https://theprint.in/india/nia-files-chargeshee...,"27 July, 2020 9:10 pm",PTI,article 370,The accused have been charged with relevant pr...
2206,"3 militants, 1 civilian killed as search opera...",https://theprint.in/india/3-militants-1-civili...,"17 September, 2020 12:10 pm",PTI,article 370,"Two CRPF personnel, including an officer, sust..."


In [None]:
print(theprint_df['Link'][2206])
print(theprint_df['Article'][2206])

https://theprint.in/india/3-militants-1-civilian-killed-as-search-operation-turns-into-encounter-in-jks-batamaloo/504547/
Two CRPF personnel, including an officer, sustained injuries after militants opened fire during the search operation in Firdousabad locality. Three militants and a civilian woman were killed, while two CRPF personnel, including an officer, sustained injuries in a predawn gunbattle on Thursday in Batamaloo area of the city, officials said. Security forces launched a cordon and search operation in Firdousabad locality in Batamaloo area at around 2.30 am following information about the presence of militants there, the officials said. They said the search operation turned into an encounter after militants opened fire on security forces. One civilian identified as Kaunsar Riyaz was killed in the firing, while two CRPF personnel, including an officer, were injured, the officials said. The injured personnel were taken to hospital and the operation was still in progress, th

In [None]:
articles_to_drop.append(2206)

In [None]:
len(articles_to_drop)

7

In [None]:
#Checking the articles on sabarimala
theprint_df[(theprint_df['Topic'] == 'sabarimala') & ~theprint_df['Article'].str.contains('sabarimala|ayyappa', case=False) & ~theprint_df['Title'].str.contains('sabarimala|ayyappa', case=False)]

Unnamed: 0,Title,Link,Date,Authors,Topic,Article
2462,"Adultery may not be cause of unhappy marriage,...",https://theprint.in/india/governance/adultery-...,"28 September, 2018 1:26 pm",Ritika Jain,sabarimala,The judgment delivered by Supreme Court was re...


In [None]:
print(theprint_df['Link'][2462])
print(theprint_df['Article'][2462])

https://theprint.in/india/governance/adultery-may-not-be-cause-of-unhappy-marriage-can-be-result-quotes-from-scs-497-ruling/126170/
The judgment delivered by Supreme Court was replete with observations that vindicated rights patriarchal societies have denied women for decades.  As the Supreme Court struck down Section 497 of the IPC, a sexist law on adultery, Thursday, Chief Justice of India Dipak Misra invoked an evocative analogy to emphasise the equality of women. “…The essentiality of the rights of women gets the real requisite space in the living room of individual dignity rather than… in an annexe to the main building,” he wrote, penning the judgment for himself and Justice A.M. Khanwilkar. A five-judge Constitution bench was unanimous in striking down the colonial-era law, which absolved men of guilt if they courted a married woman with her husband’s permission. Another controversial provision kept women out of the purview of punishment, with only men held accountable. In their 

In [None]:
articles_to_drop.append(2462)

In [None]:
#Checking the articles on section 377
theprint_df[(theprint_df['Topic'] == 'section 377') & ~theprint_df['Article'].str.contains('377|queer|lgbt|gay', case=False) & ~theprint_df['Title'].str.contains('377|queer|lgbt|gay', case=False)]

Unnamed: 0,Title,Link,Date,Authors,Topic,Article
2532,"Thank you, Supreme Court",https://theprint.in/50-word-edit/thank-you-sup...,"6 September, 2018 1:14 pm",ThePrint Team,section 377,The 50-word Edit — ThePrint view on the most...


In [None]:
print(theprint_df['Link'][2532])
print(theprint_df['Article'][2532])

https://theprint.in/50-word-edit/thank-you-supreme-court/112826/
The  50-word Edit  — ThePrint view on the most important issues, instantly. The Supreme Court has redeemed itself by decriminalising homosexuality. This is a historic win for civil rights and equality, especially because it addresses a longstanding grievance of a minority group. Politicians, however, should be filled with remorse for shirking their responsibility and allowing the judiciary to claim all credit.


In [None]:
#Checking the articles on CAA
theprint_df[(theprint_df['Topic'] == 'CAA') & ~theprint_df['Article'].str.contains('caa|nrc|citizen|shaheen', case=False) & ~theprint_df['Title'].str.contains('caa|nrc|citizen|shaheen', case=False)]

Unnamed: 0,Title,Link,Date,Authors,Topic,Article


In [None]:
#Dropping the identified articles
print("Number of articles before dropping: ", theprint_df.shape[0])
theprint_df.drop(articles_to_drop, inplace=True)
print("Number of articles after dropping: ", theprint_df.shape[0])

Number of articles before dropping:  3083
Number of articles after dropping:  3075


In [None]:
#Saving the dataframe to disk
theprint_df.to_pickle("theprint_dataframe", compression="zip")
theprint_df.shape

(3075, 6)

In [None]:
#To load the pickled dataframe
theprint_df = pd.read_pickle("theprint_dataframe", compression="zip")
theprint_df.shape

(3075, 6)

In [None]:
print("Farm Laws", theprint_df[theprint_df['Topic']=='farm laws'].shape)
print("Rafale", theprint_df[theprint_df['Topic']=='rafale'].shape)
print("Article 370", theprint_df[theprint_df['Topic']=='article 370'].shape)
print("Sabarimala", theprint_df[theprint_df['Topic']=='sabarimala'].shape)
print("Section 377", theprint_df[theprint_df['Topic']=='section 377'].shape)
print("CAA", theprint_df[theprint_df['Topic']=='caa'].shape)

Farm Laws (701, 6)
Rafale (354, 6)
Article 370 (1253, 6)
Sabarimala (100, 6)
Section 377 (43, 6)
CAA (624, 6)


In [None]:
theprint_df['Authors'].value_counts()[:8]

PTI                    828
Azaan Javaid           252
ThePrint Team          159
Snehesh Alex Philip     87
Nayanima Basu           83
Ananya Bhardwaj         70
Fatima Khan             62
ANI                     56
Name: Authors, dtype: int64

In [None]:
theprint_df[theprint_df['Authors'].str.contains('Press|News|PTI|ANI|UTI|IANS', case=False)].shape

(1019, 6)