# Import Necessary Libraries

In [1]:
import bs4
from bs4 import BeautifulSoup
import requests
import pandas as pd
import re

# Read .csv file and Create Resultant DataFrame

In [2]:
df = pd.read_csv('Problems.csv')
final_df = pd.DataFrame({'Problem Number':df['PROBLEM NO']})
df = df.set_index('SR.NO')
final_df.head(10)

Unnamed: 0,Problem Number
0,4A
1,1A
2,71A
3,158A
4,118A
5,231A
6,50A
7,282A
8,112A
9,96A


Iterate through the links to add problem statement definitions to an empty list.

In [3]:
links = df['LINK']
problemStatements = []
for link in links:
    page = requests.get(f"{link}")
    soup = BeautifulSoup(page.content,"lxml")
    match = soup.find(class_ = 'problem-statement')
    for x in match:
        if str(x)[:8] == '<div><p>':
            result = re.sub(r"<p>", "", str(x))
            result = re.sub(r"</p>", "", str(result))
            result = re.sub(r"<div>", "", str(result))
            result = re.sub(r"</div>", "", str(result))
            problemStatements.append(result)

Add the problem statement list as a column to resultant dataframe.

In [4]:
final_df = pd.concat([final_df,pd.Series(problemStatements)],axis = 1)
final_df.columns = ['Problem Number', 'Problem Statement']

In [5]:
final_df.head()

Unnamed: 0,Problem Number,Problem Statement
0,4A,One hot summer day Pete and his friend Billy d...
1,1A,Theatre Square in the capital city of Berland ...
2,71A,"Sometimes some words like ""<span class=""tex-fo..."
3,158A,"""Contestant who earns a score equal to or grea..."
4,118A,Petya started to attend programming lessons. O...


Similarly, do the same for input and output specifications of the problems.

In [6]:
links = df['LINK']
inputSpecifications = []
for link in links:
    page = requests.get(f"{link}")
    soup = BeautifulSoup(page.content,"lxml")
    match = soup.find(class_ = 'input-specification').get_text()
    inputSpecifications.append(str(match)[5:])

In [7]:
final_df = pd.concat([final_df,pd.Series(inputSpecifications)],axis = 1)
final_df.columns = ['Problem Number', 'Problem Statement', 'Input Specifications']

In [8]:
final_df.head()

Unnamed: 0,Problem Number,Problem Statement,Input Specifications
0,4A,One hot summer day Pete and his friend Billy d...,The first (and the only) input line contains i...
1,1A,Theatre Square in the capital city of Berland ...,The input contains three positive integer numb...
2,71A,"Sometimes some words like ""<span class=""tex-fo...",The first line contains an integer n (1 ≤ n ≤ ...
3,158A,"""Contestant who earns a score equal to or grea...",The first line of the input contains two integ...
4,118A,Petya started to attend programming lessons. O...,The first line represents input string of Pety...


In [9]:
links = df['LINK']
outputSpecifications = []
for link in links:
    page = requests.get(f"{link}")
    soup = BeautifulSoup(page.content,"lxml")
    match = soup.find(class_ = 'output-specification').get_text()
    outputSpecifications.append(str(match)[6:])

In [10]:
final_df = pd.concat([final_df,pd.Series(outputSpecifications)],axis = 1)
final_df.columns = ['Problem Number', 'Problem Statement', 'Input Specifications','Output Specifications']

In [11]:
final_df.head()

Unnamed: 0,Problem Number,Problem Statement,Input Specifications,Output Specifications
0,4A,One hot summer day Pete and his friend Billy d...,The first (and the only) input line contains i...,"Print YES, if the boys can divide the watermel..."
1,1A,Theatre Square in the capital city of Berland ...,The input contains three positive integer numb...,Write the needed number of flagstones.
2,71A,"Sometimes some words like ""<span class=""tex-fo...",The first line contains an integer n (1 ≤ n ≤ ...,Print n lines. The i-th line should contain th...
3,158A,"""Contestant who earns a score equal to or grea...",The first line of the input contains two integ...,Output the number of participants who advance ...
4,118A,Petya started to attend programming lessons. O...,The first line represents input string of Pety...,Print the resulting string. It is guaranteed t...


Write resultant dataframe to a .csv file.

In [12]:
final_df.to_csv('Problem Statements.csv')