<a href="https://colab.research.google.com/github/navidadkhah/Fine-Tuning-LLMs/blob/main/Dataset/Create_dataset.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Installing requirement libraries

In [2]:
!pip install mutmut
!pip install astor
!pip install libcst

Collecting mutmut
  Downloading mutmut-2.5.1.tar.gz (50 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/50.5 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m50.5/50.5 kB[0m [31m1.6 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting pony (from mutmut)
  Downloading pony-0.7.19-py3-none-any.whl.metadata (2.8 kB)
Collecting junit-xml<2,>=1.8 (from mutmut)
  Downloading junit_xml-1.9-py2.py3-none-any.whl.metadata (3.2 kB)
Downloading junit_xml-1.9-py2.py3-none-any.whl (7.1 kB)
Downloading pony-0.7.19-py3-none-any.whl (317 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m317.3/317.3 kB[0m [31m5.7 MB/s[0m eta [36m0:00:00[0m
[?25hBuilding wheels for collected packages: mutmut
  Building wheel for mutmut (setup.py) ... [?25l[?25hdone
  Created wheel for mutmut: filename=mutmut-2.5.1-py2.py3-none-any.whl size=31323 sha256=9045b81443c3161d76

In [10]:
import numpy as np
import pandas as pd
import csv
import json
from collections.abc import Iterator
import ast    # To convert code to AST
import astor  # To convert AST back to code
import random  # To select mutations randomly
import libcst as cst
import libcst.matchers as m


# Reading Dataset

In [5]:
# Reading not completed dataset
path = '/content/drive/MyDrive/Bachelor\'s project/python_codes.csv'
df = pd.read_csv(path)
print(f"Number of Dataframe rows : {df.shape}")
df.head()

Number of Dataframe rows : (23187, 2)


Unnamed: 0,language,source
0,Python 3,"n, m = map(int, input().split())\na = []\nfor ..."
1,Python 3,"n, m = map(int, input().split())\na = []\nfor ..."
2,Python 3,#In the name of Allah\n\nfrom sys import stdin...
3,Python 3,"s = input()\nl, *v = (int(x) for x in input()...."
4,Python 3,"s = input()\nl, *v = (int(x) for x in input()...."


In [6]:
# Droping language column
df = df.drop('language', axis=1)
print(df.shape)
df.head()

(23187, 1)


Unnamed: 0,source
0,"n, m = map(int, input().split())\na = []\nfor ..."
1,"n, m = map(int, input().split())\na = []\nfor ..."
2,#In the name of Allah\n\nfrom sys import stdin...
3,"s = input()\nl, *v = (int(x) for x in input()...."
4,"s = input()\nl, *v = (int(x) for x in input()...."


In [7]:
# Removing codes cotain comments
def has_comments(code):
    return '#' in code

df = df[~df['source'].apply(has_comments)]
print(df.shape)
df = df.reset_index(drop=True)
df.head()

(17743, 1)


Unnamed: 0,source
0,"n, m = map(int, input().split())\na = []\nfor ..."
1,"n, m = map(int, input().split())\na = []\nfor ..."
2,"s = input()\nl, *v = (int(x) for x in input()...."
3,"s = input()\nl, *v = (int(x) for x in input()...."
4,"n = int(input())\na = list(map(int, input().sp..."


In [155]:
df['source'][0].split("\n")

['n, m = map(int, input().split())',
 'a = []',
 'for i in range(n + 1):',
 '    a.append([0])',
 'for i in range(m):',
 '    b, c = map(int, input().split())',
 '    a[b].append(c)',
 '    a[c].append(b)',
 'mi = 10 ** 9',
 'for i in range(1, n + 1):',
 '    for j in range(i + 1, n + 1):',
 '        for k in range(j + 1, n + 1):',
 '            if (j in a[i]) and (k in a[i]) and (k in a[j]):',
 '                mi = min(mi, len(a[i]) + len(a[j]) + len(a[k]) - 9)',
 'if mi != 10 ** 9:',
 '    print(mi)',
 'else:',
 '    print(-1)']

# Mutanting code
In this section, we use mutation technique code using the CST library. Changing operations in the code and randomly replace with CST operations.
## Why we are using CST?
We are using CST instead of AST because we want to save the format of the structure too and CST does it for us.
<br>
In the output, we expect a mutant code and a line to tell us which line has changed.

In [None]:
# Define a function to mutate and print code
def mutate_code(code_str):
    # Parse the string into an AST
    tree = ast.parse(code_str)

    # List of possible binary operations for mutation
    operations = [ast.Add, ast.Sub, ast.Mult, ast.Div, ast.Mod, ast.Pow,
                  ast.FloorDiv, ast.LShift, ast.RShift, ast.BitOr, ast.BitXor,
                  ast.BitAnd, ast.MatMult]

    # Split the original code into lines
    original_lines = code_str.strip().split('\n')


    # Apply a random mutation to the AST
    for node in ast.walk(tree):
        if isinstance(node, ast.BinOp):
            # Randomly choose a new operator
            new_op = random.choice(operations)()
            node.op = new_op
            break

    # Convert the mutated AST back to a string
    mutated_code = astor.to_source(tree).strip()
    mutated_lines = mutated_code.split('\n')

    # Print the mutated code
    print(mutated_code)


    # Identify which lines have changed
    changed_lines = []
    line = 1;
    for original, mutated in zip(original_lines, mutated_lines):
        if original != mutated:
          print(f"Line {line} has changed")
          changed_lines.append(mutated)
        line += 1

    # Output changed lines
    if changed_lines:
        print("\n--- Changed Lines ---")
        for line in changed_lines:
            print(line)
    else:
        print("No changes detected.")

    # Reset the tree by re-parsing the original code
    tree = ast.parse(code_str)

In [140]:
# Example usage: Pass code as a string

# for index in range(len(df)):

code = df['source'][2].split('\n')
filtered_code_list = [line for line in code if line.strip()]

formatted_code = '\n'.join(filtered_code_list)

cpp_code_str = f"""
{formatted_code}
"""

# print(cpp_code_str)

test = "(a + b) * c"

mutate_code(test)

(a + b) * c
No changes detected.


In [141]:
import ast
import random

def mutate_code(source_code):
    tree = ast.parse(source_code)

    operations = [ast.Add, ast.Sub, ast.Mult, ast.Div, ast.Mod, ast.Pow,
                  ast.FloorDiv, ast.LShift, ast.RShift, ast.BitOr, ast.BitXor,
                  ast.BitAnd, ast.MatMult]

    # mutation_type = random.choice(["value", "decision", "statement", "operation"])
    mutation_type = "operation"

    print(mutation_type)
    if mutation_type == "value":
        # Value Mutation: Change integer constants
        for node in ast.walk(tree):
            if isinstance(node, ast.Constant) and isinstance(node.value, int):
                node.value = random.randint(1, 10000)
                break  # Apply mutation to only one line

    elif mutation_type == "decision":
        # Decision Mutation: Change comparison operators
        for node in ast.walk(tree):
            if isinstance(node, ast.Compare):
                node.ops[0] = random.choice([ast.Gt(), ast.Lt(), ast.Eq(), ast.GtE(), ast.LtE()])
                break  # Apply mutation to only one line

    elif mutation_type == "statement":
        # Statement Mutation: Change the assigned variable name
        for node in ast.walk(tree):
            if isinstance(node, ast.Assign) and isinstance(node.targets[0], ast.Name):
                node.targets[0].id = random.choice(['x', 'y', 'z'])
                break  # Apply mutation to only one line

    elif mutation_type == "operation":
        # Changing Operations: Change the binary operation
        for node in ast.walk(tree):
           print(node)
        for node in ast.walk(tree):
            if isinstance(node, ast.BinOp):
                new_op = random.choice(operations)()
                print(new_op)
                node.op = new_op
                break  # Apply mutation to only one line

    # Convert the AST back into source code
    mutated_code = astor.to_source(tree).strip()
    mutated_lines = mutated_code.split('\n')

    # Print the mutated code
    # print(mutated_code)

    return ast.unparse(tree)

# Example code to mutate
source_code = '''
(a + b) + d
'''

# Mutate the code
mutated_code = mutate_code(source_code)
print(mutated_code)


operation
<ast.Module object at 0x7850f36cc970>
<ast.Expr object at 0x7850f36cf340>
<ast.BinOp object at 0x7850f36cfb20>
<ast.BinOp object at 0x7850f36cfa30>
<ast.Add object at 0x78513e7b0490>
<ast.Name object at 0x7850f36cc9a0>
<ast.Name object at 0x7850f36ce590>
<ast.Add object at 0x78513e7b0490>
<ast.Name object at 0x7850f36cd300>
<ast.Load object at 0x78513e7b0250>
<ast.Load object at 0x78513e7b0250>
<ast.Load object at 0x78513e7b0250>
<ast.BitOr object at 0x7850f351fa30>
a + b | d


In [186]:
import random
import libcst as cst

class OperationChanger(cst.CSTTransformer):
    operations = [
        cst.Add(), cst.Subtract(), cst.Multiply(), cst.Divide(),
        cst.Modulo(), cst.Power(), cst.FloorDivide(),
        cst.LeftShift(), cst.RightShift(),
        cst.BitOr(), cst.BitXor(), cst.BitAnd(),
        cst.MatrixMultiply()
    ]

    def __init__(self):
        super().__init__()
        self.changed = False  # Flag to track if a change has been made

    def leave_BinaryOperation(self, original_node, updated_node):
        # Change '+' to another operation if the operation is a '+' and no changes have been made yet
        if not self.changed and isinstance(updated_node.operator, cst.Add):
            new_op = random.choice(self.operations)
            self.changed = True  # Set flag to True after changing one operation
            return updated_node.with_changes(operator=new_op)
        return updated_node

# Original code with multiple lines and parentheses
code = """
n, m = map(int, input().split())
a = []
for i in range(n + 1):
    a.append([0])
for i in range(m):
    b, c = map(int, input().split())
    a[b].append(c)
    a[c].append(b)
mi = 10 ** 9
for i in range(1, n + 1):
    for j in range(i + 1, n + 1):
        for k in range(j + 1, n + 1):
            if (j in a[i]) and (k in a[i]) and (k in a[j]):
                mi = min(mi, len(a[i]) + len(a[j]) + len(a[k]) - 9)
if mi != 10 ** 9:
    print(mi)
else:
    print(-1)
"""


# code = df['source'][2].split('\n')
# filtered_code_list = [line for line in code if line.strip()]

# formatted_code = '\n'.join(filtered_code_list)

# code = f"""
# {formatted_code}
# """
# Parse the code into CST
module = cst.parse_module(code)

# Apply the transformer to change one '+' to another operation
modified_tree = module.visit(OperationChanger())

# Print the modified code, parentheses preserved
print(modified_tree.code)



n, m = map(int, input().split())
a = []
for i in range(n ** 1):
    a.append([0])
for i in range(m):
    b, c = map(int, input().split())
    a[b].append(c)
    a[c].append(b)
mi = 10 ** 9
for i in range(1, n + 1):
    for j in range(i + 1, n + 1):
        for k in range(j + 1, n + 1):
            if (j in a[i]) and (k in a[i]) and (k in a[j]):
                mi = min(mi, len(a[i]) + len(a[j]) + len(a[k]) - 9)
if mi != 10 ** 9:
    print(mi)
else:
    print(-1)



In [177]:
code = df['source'][0].split('\n')
filtered_code_list = [line for line in code if line.strip()]

formatted_code = '\n'.join(filtered_code_list)

code = f"""
{formatted_code}
"""

print(code)


n, m = map(int, input().split())
a = []
for i in range(n + 1):
    a.append([0])
for i in range(m):
    b, c = map(int, input().split())
    a[b].append(c)
    a[c].append(b)
mi = 10 ** 9
for i in range(1, n + 1):
    for j in range(i + 1, n + 1):
        for k in range(j + 1, n + 1):
            if (j in a[i]) and (k in a[i]) and (k in a[j]):
                mi = min(mi, len(a[i]) + len(a[j]) + len(a[k]) - 9)
if mi != 10 ** 9:
    print(mi)
else:
    print(-1)



# Last dataset

  

In [None]:
# In this cell, we get datas from below url. datas contain number of available contests
# Usefull variables:
#     last_contest_id : keeps id of the last contest
#     number_of_contests : keeps number of FINISHED contests
#     contest_IDs : keeps the ID contests


url = "https://codeforces.com/api/contest.list"

response = requests.get(url)
data = response.json()

if data['status'] == "OK":
    contests = data['result']

    # Filter and print only Python submissions
    last_contest_id = 0
    not_started_yet = 0
    contest_IDs = []
    for contest in contests:
      if contest['phase'] == 'BEFORE':
        not_started_yet += 1
      else:
        if last_contest_id == 0:
          last_contest_id = contest['id']
        contest_IDs.append(contest['id'])


    number_of_contests = len(contests) - not_started_yet

    contest_IDs.sort()
    print(f"Number of available contests: {number_of_contests}\nID of the last contest: {last_contest_id}")

else:
    print("Error:", data['comment'])

NameError: name 'requests' is not defined

In [None]:
# Now, we use this Function to get the best users from contests
# Usefull variables
#     rated_list_handle : keeps all the best handles

def get_contest_users(contest_id, count, rated_list_handle):

    url = f"https://codeforces.com/api/contest.standings?contestId={contest_id}&from=1&count={count}"

    # Make the GET request to Codeforces API
    response = requests.get(url)
    data = response.json()

    # Check if the API call was successful
    if data['status'] == 'OK':
        standings = data['result']['rows']

        # Extract and print user handles
        for row in standings:
            handle = row['party']['members'][0]['handle']
            rank = row['rank']
            if handle not in rated_list_handle:
              rated_list_handle.append(handle)
            # print(f"Rank: {rank}, User: {handle}")
    else:
        print("Error:", data['comment'])

# Example: Get the first 10 users from contest 566
number_of_rated = 5
rated_list_handle = []
for index in range(len(contest_IDs), 0, -1):
    get_contest_users(index, number_of_rated, rated_list_handle)
print(f"Number of top rated contestor of all time: {len(rated_list_handle)}")
print(f"Some example:\n{rated_list_handle[0:10]}")

Error: contestId: Contest with id 1908 not found
Error: contestId: Contest with id 1897 not found
Error: contestId: Contest with id 1892 not found
Error: contestId: Contest with id 1880 not found
Error: contestId: Contest with id 1871 not found
Error: contestId: Contest with id 1865 not found
Error: contestId: Contest with id 1803 not found
Error: contestId: Contest with id 1757 not found
Error: contestId: Contest with id 1756 not found
Error: contestId: Contest with id 1745 not found
Error: contestId: Contest with id 1727 not found
Error: contestId: Contest with id 1683 not found
Error: contestId: Contest with id 1664 not found
Error: contestId: Contest with id 1655 not found
Error: contestId: Contest with id 1653 not found
Error: contestId: Contest with id 1645 not found
Error: contestId: Contest with id 1643 not found
Error: contestId: Contest with id 1640 not found
Error: contestId: Contest with id 1636 not found
Error: contestId: Contest with id 1597 not found
Error: contestId: Co

In [None]:
import json

# Function to add a submission
def add_submission(contest_id, submission_id, problem_name, user_handle):
    submission = {
        "contest_id": contest_id,
        "submission_id": submission_id,
        "problem_name": problem_name,
        "user_handle": user_handle
    }
    submissions_data["submissions"].append(submission)

def find_python_submission(handle):
    # API URL to get submissions for the user
    url = f"https://codeforces.com/api/user.status?handle={handle}&from=1&count=10000"

    # Make the GET request to Codeforces API
    response = requests.get(url)
    data = response.json()

    # Check if the API call was successful
    if data['status'] == 'OK':
        submissions = data['result']

        # Filter and print only Python submissions
        for submission in submissions:
            if 'Python' in submission['programmingLanguage'] and submission['verdict'] == "OK":  # Check if the language is Python
              try:
                submission_id = submission['id']
                contest_id = submission['contestId']
                problem_name = submission['problem']['name']
                add_submission(contest_id, submission_id, problem_name, handle)
              except:
                print("An exception occurred")
    else:
        print("Error:", data['comment'])


submissions_data = {
    "submissions": []
}

for index in rated_list_handle:
   find_python_submission(index)

with open('submissions.json', 'w') as json_file:
    json.dump(submissions_data, json_file, indent=4)

# Print the JSON structure
print(json.dumps(submissions_data, indent=4))


[1;30;43mStreaming output truncated to the last 5000 lines.[0m
        {
            "contest_id": 282,
            "submission_id": 11146785,
            "problem_name": "XOR and OR",
            "user_handle": "VinyleEm"
        },
        {
            "contest_id": 322,
            "submission_id": 11146055,
            "problem_name": "Ciel and Flowers",
            "user_handle": "VinyleEm"
        },
        {
            "contest_id": 453,
            "submission_id": 11145891,
            "problem_name": "Little Pony and Expected Maximum",
            "user_handle": "VinyleEm"
        },
        {
            "contest_id": 347,
            "submission_id": 11145534,
            "problem_name": "Fixed Points",
            "user_handle": "VinyleEm"
        },
        {
            "contest_id": 520,
            "submission_id": 10130454,
            "problem_name": "DNA Alignment",
            "user_handle": "VinyleEm"
        },
        {
            "contest_id": 520,
      

In [None]:
with open('submissions.json', 'w') as json_file:
    json.dump(submissions_data, json_file, indent=4)

# Print the JSON structure
print(len(json.dumps(submissions_data, indent=4)))


4753208


In [None]:
with open('submissions_data.json', 'w') as f:
    json.dump(submissions_data, f)

NameError: name 'submissions' is not defined

In [None]:
path = '/content/drive/MyDrive/Bachelor\'s project/submissions.json'

with open(path, 'r') as file:
    data = json.load(file)
data = data['submissions']
len(data)

26572

In [None]:
import requests
import time

from bs4 import BeautifulSoup

# URL of the webpage you want to scrape
url = 'https://codeforces.com/contest/343/submission/4460849'

# Fetch the webpage content
response = requests.get(url)

# Parse the content with BeautifulSoup
soup = BeautifulSoup(response.content, 'html.parser')


# Print the title of the page
print(soup.prettify())

# Example: Find all the links on the page
for link in soup.find_all('div'):
    print(link.get('href'))


<!DOCTYPE html>
<html lang="en-US">
 <head>
  <title>
   Just a moment...
  </title>
  <meta content="text/html; charset=utf-8" http-equiv="Content-Type"/>
  <meta content="IE=Edge" http-equiv="X-UA-Compatible"/>
  <meta content="noindex,nofollow" name="robots"/>
  <meta content="width=device-width,initial-scale=1" name="viewport"/>
  <style>
   *{box-sizing:border-box;margin:0;padding:0}html{line-height:1.15;-webkit-text-size-adjust:100%;color:#313131;font-family:system-ui,-apple-system,BlinkMacSystemFont,Segoe UI,Roboto,Helvetica Neue,Arial,Noto Sans,sans-serif,Apple Color Emoji,Segoe UI Emoji,Segoe UI Symbol,Noto Color Emoji}body{display:flex;flex-direction:column;height:100vh;min-height:100vh}.main-content{margin:8rem auto;max-width:60rem;padding-left:1.5rem}@media (width <= 720px){.main-content{margin-top:4rem}}.h2{font-size:1.5rem;font-weight:500;line-height:2.25rem}@media (width <= 720px){.h2{font-size:1.25rem;line-height:1.5rem}}#challenge-error-text{background-image:url(data:i

In [None]:
import os
from selenium import webdriver
from selenium.webdriver.firefox.service import Service
from selenium.webdriver.firefox.options import Options
from webdriver_manager.firefox import GeckoDriverManager

# Set the display environment variable for xvfb
os.environ['DISPLAY'] = ':1'

# Set up Firefox options for headless mode
firefox_options = Options()
firefox_options.add_argument("--headless")
firefox_options.add_argument("--no-sandbox")
firefox_options.add_argument("--disable-dev-shm-usage")
# Initialize the Firefox WebDriver with GeckoDriver
driver = webdriver.Firefox(service=Service(GeckoDriverManager().install()), options=firefox_options)


data[0]['contest_id']

for index in range(len(data)):
  contest_id = data[index]['contest_id']
  submission_id = data[index]['submission_id']
  problem_name = data[index]['problem_name']



WebDriverException: Message: Process unexpectedly closed with status 1


In [None]:
https://codeforces.com/contest/2019/submission/284768286

In [None]:
from selenium.common.exceptions import WebDriverException
driver = webdriver.Firefox(service=Service(GeckoDriverManager().install()), options=firefox_options)
try:
    driver = webdriver.Firefox(service=Service(GeckoDriverManager().install()), options=firefox_options)
    driver.get('https://www.example.com')
    print(driver.title)
except WebDriverException as e:
    print(f"Error: {e}")

WebDriverException: Message: Process unexpectedly closed with status 1


In [None]:

!apt-get update
!apt install firefox
!pip install -U selenium
!wget https://github.com/mozilla/geckodriver/releases/latest/download/geckodriver-linux64.tar.gz
!tar -xvzf geckodriver-linux64.tar.gz
!chmod +x geckodriver

Hit:1 https://cloud.r-project.org/bin/linux/ubuntu jammy-cran40/ InRelease
Hit:2 https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64  InRelease
Get:3 http://security.ubuntu.com/ubuntu jammy-security InRelease [129 kB]
Ign:4 https://r2u.stat.illinois.edu/ubuntu jammy InRelease
Hit:5 http://archive.ubuntu.com/ubuntu jammy InRelease
Hit:6 https://r2u.stat.illinois.edu/ubuntu jammy Release
Get:7 http://archive.ubuntu.com/ubuntu jammy-updates InRelease [128 kB]
Hit:8 http://archive.ubuntu.com/ubuntu jammy-backports InRelease
Hit:10 https://ppa.launchpadcontent.net/deadsnakes/ppa/ubuntu jammy InRelease
Hit:11 https://ppa.launchpadcontent.net/graphics-drivers/ppa/ubuntu jammy InRelease
Hit:12 https://ppa.launchpadcontent.net/ubuntugis/ppa/ubuntu jammy InRelease
Get:13 http://security.ubuntu.com/ubuntu jammy-security/main amd64 Packages [2,325 kB]
Get:14 http://security.ubuntu.com/ubuntu jammy-security/universe amd64 Packages [1,160 kB]
Fetched 3,743 kB in 2s (1,688 kB/s

In [None]:
from selenium import webdriver
from selenium.webdriver.firefox.service import Service
from selenium.webdriver.firefox.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import os

# تنظیمات مرورگر فایرفاکس برای حالت بدون سر
firefox_options = Options()
firefox_options.add_argument("--headless")  # حالت بدون سر
firefox_options.add_argument("--no-sandbox")
firefox_options.add_argument("--disable-dev-shm-usage")

# مسیر geckodriver
geckodriver_path = os.getcwd() + "/geckodriver"  # استفاده از مسیر فعلی

# راه‌اندازی WebDriver فایرفاکس
driver = webdriver.Firefox(service=Service(geckodriver_path), options=firefox_options)

# باز کردن وب‌سایت
driver.get('https://example.com')  # آدرس وب‌سایت را تغییر دهید

try:
    # صبر کردن برای نمایش دکمه اکسپت کوکی
    accept_cookies_button = WebDriverWait(driver, 10).until(
        EC.element_to_be_clickable((By.XPATH, '//button[text()="Accept"]'))  # تغییر XPath بر اساس دکمه اکسپت در وب‌سایت
    )
    accept_cookies_button.click()  # کلیک روی دکمه اکسپت کوکی‌ها
except Exception as e:
    print("کوکی‌ها پذیرفته نشدند یا دکمه موجود نیست:", e)

# ادامه کار با وب‌سایت بعد از اکسپت شدن کوکی‌ها
# ...

# بستن مرورگر
driver.quit()


NoSuchDriverException: Message: Unable to obtain driver for firefox; For documentation on this error, please visit: https://www.selenium.dev/documentation/webdriver/troubleshooting/errors/driver_location


In [None]:
# نصب Firefox و GeckoDriver
!apt-get update
!apt install -y firefox
!pip install -U selenium
!wget https://github.com/mozilla/geckodriver/releases/latest/download/geckodriver-linux64.tar.gz
!tar -xvzf geckodriver-linux64.tar.gz
!chmod +x geckodriver

# بررسی مسیر فعلی
import os
os.getcwd()  # نمایش مسیر فعلی


0% [Working]            Hit:1 https://cloud.r-project.org/bin/linux/ubuntu jammy-cran40/ InRelease
0% [Waiting for headers] [Waiting for headers] [Connected to r2u.stat.illinois.edu (192.17.190.167)]                                                                                                    Hit:2 https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64  InRelease
0% [Waiting for headers] [Waiting for headers] [Waiting for headers] [Connecting to ppa.launchpadcon                                                                                                    Hit:3 http://archive.ubuntu.com/ubuntu jammy InRelease
                                                                                                    Hit:4 http://security.ubuntu.com/ubuntu jammy-security InRelease
0% [Waiting for headers] [Waiting for headers] [Connected to ppa.launchpadcontent.net (185.125.190.8                                                                            

'/content'

In [None]:
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager
from webdriver_manager.firefox import GeckoDriverManager

# Setup Selenium WebDriver
driver = webdriver.Firefox()

# Function to automate browser and scrape code
def selenium_scrape(contest_id, submission_id):
    url = f"https://codeforces.com/contest/{contest_id}/submission/{submission_id}"

    # Open the webpage
    driver.get(url)

    # Find the element that contains the source code
    code_element = driver.find_element('xpath', '//pre[@class="prettyprint"]')

    if code_element:
        source_code = code_element.text
        print("Source code extracted:")
        print(source_code)
    else:
        print("Couldn't find the source code on the page.")

    # Close the browser
    driver.quit()

# Example usage:
selenium_scrape(566, 12345678)


WebDriverException: Message: Process unexpectedly closed with status 1
