### Step-2 to data collection and preprocessing

In [None]:
import requests
import time
from urllib.parse import urlparse

# GitHub Personal Access Token (Replace with your token)
# This token is used to authenticate with the GitHub API, allowing the script to access repository contents.
# Ensure the token has the 'repo' scope to access private repositories or bypass rate limits effectively.
GITHUB_TOKEN = "your_token" #activate it to run it

# GitHub repository URL
# This is the URL of the repository from which Python files will be fetched.
repo_url = "https://github.com/king04aman/All-In-One-Python-Projects"

# Parse the repository URL
# This section extracts the repository owner and name from the provided URL.
parsed_url = urlparse(repo_url)
repo_owner = parsed_url.path.split('/')[1]  # Extracts the owner (e.g., 'king04aman')
repo_name = parsed_url.path.split('/')[2]   # Extracts the repository name (e.g., 'All-In-One-Python-Projects')

# GitHub API URL
# This constructs the API URL for accessing the repository's contents.
api_url = f"https://api.github.com/repos/{repo_owner}/{repo_name}/contents/"

# Function to fetch Python files recursively with authentication and error handling
def fetch_python_files(api_url, python_files=[]):
    """
    Recursively fetches all Python files (.py) from a given GitHub repository URL.

    Args:
        api_url (str): The GitHub API URL to fetch contents from.
        python_files (list): A list to store the download URLs of found Python files.

    Returns:
        list: A list containing the download URLs of all found Python files.
    """
    headers = {
        "Authorization": f"token {GITHUB_TOKEN}",  # Adds authentication header with the provided token.
        "Accept": "application/vnd.github.v3+json"  # Specifies the API version to use.
    }

    response = requests.get(api_url, headers=headers)  # Sends a GET request to the API.

    # Handle API rate limit
    if response.status_code == 403:  # Forbidden (likely rate limit exceeded)
        print("Rate limit exceeded. Waiting 60 seconds before retrying...")
        time.sleep(60)  # Pauses execution for 60 seconds.
        return fetch_python_files(api_url, python_files) #retries the same request after waiting.

    if response.status_code == 200:  # Successful response
        items = response.json()  # Parses the JSON response into a list of items (files and directories).

        for item in items:
            if item["type"] == "file" and item["name"].endswith(".py"):  # Checks if the item is a Python file.
                python_files.append(item["download_url"])  # Adds the download URL to the list.
            elif item["type"] == "dir":  # Checks if the item is a directory.
                # Delay added to prevent rate limits
                time.sleep(1) # small delay to help prevent rate limiting.
                fetch_python_files(item["url"], python_files)  # Recursively calls the function for the directory.
    else:
        print(f"Failed to fetch contents from {api_url} (Status Code: {response.status_code})") #prints out any error codes.

    return python_files  # Returns the list of Python file URLs.

# Get all Python files
python_files = fetch_python_files(api_url)  # Calls the function to fetch Python files.

# Print results
if python_files:
    print("Python files found:")
    for file in python_files[:5]:  # Print first 5 Python file URLs
        print(file)
else:
    print("No Python files found.") #Prints if no python files were found.

Python files found:
https://raw.githubusercontent.com/king04aman/All-In-One-Python-Projects/main/Age%20Calculator/calculate.py
https://raw.githubusercontent.com/king04aman/All-In-One-Python-Projects/main/Alarm%20Bot/main.py
https://raw.githubusercontent.com/king04aman/All-In-One-Python-Projects/main/Ascii%20Art%20Generator/generate.py
https://raw.githubusercontent.com/king04aman/All-In-One-Python-Projects/main/Audio%20Book%20Generator/main.py
https://raw.githubusercontent.com/king04aman/All-In-One-Python-Projects/main/CountDown%20Timer/main.py


In [11]:
"""
This script calculates the cyclomatic complexity of a Python code file retrieved from a given URL.

It uses the following libraries:

- requests: Used to fetch the Python code from the provided URL.
- radon.complexity: Used to calculate the cyclomatic complexity of the code.

The script works as follows:

1. Defines a function `get_code_complexity(code)`:
   - Takes Python code as input.
   - Uses `radon.complexity.cc_visit` to analyze the code and get complexity results.
   - If complexity results are found, it calculates and returns the average cyclomatic complexity.
   - If no results are found, it returns 0.

2. Retrieves the first Python file URL from a list named `python_files`. (It assumes `python_files` is already defined and populated by the previous script.)

3. Fetches the code from the retrieved URL using `requests.get()`.

4. If the code is successfully fetched:
   - Calls `get_code_complexity()` to calculate the complexity score.
   - Prints the file URL and its calculated complexity score, formatted to two decimal places.

5. If fetching the code fails, it prints an error message.

6. Includes a try-except block to catch and print any exceptions that might occur during the process.
"""

import requests
from radon.complexity import cc_visit

def get_code_complexity(code):
    results = cc_visit(code)
    if results:
        return sum(res.complexity for res in results) / len(results)  # Average Complexity
    return 0  # Return 0 if no complexity results found

# List of Python file URLs (from your recursive function)
# Fetch and analyze the first Python file
file_url = python_files[0]  # First file in the list

try:
    code_response = requests.get(file_url)
    if code_response.status_code == 200:
        code_sample = code_response.text
        complexity_score = get_code_complexity(code_sample)
        print(f"File: {file_url}")
        print(f"Code Complexity Score: {complexity_score:.2f}")
    else:
        print(f"Failed to fetch the code file: {file_url}")
except Exception as e:
    print(f"Error: {e}")


File: https://raw.githubusercontent.com/king04aman/All-In-One-Python-Projects/main/Age%20Calculator/calculate.py
Code Complexity Score: 4.50


In [12]:
"""
This script performs static code analysis using Pylint to evaluate the quality of Python code.

It utilizes the following libraries and modules:

- subprocess: Used to execute the Pylint command-line tool.
- tempfile: Used to create a temporary file to store the code for Pylint analysis.

The script works as follows:

1. Defines a function `get_pylint_score(code)`:
   - Takes Python code as input.
   - Creates a temporary Python file using `tempfile.NamedTemporaryFile`.
   - Writes the input code to the temporary file.
   - Executes Pylint on the temporary file using `subprocess.run`.
   - Captures the output of Pylint.
   - Parses the output to extract the Pylint score (e.g., "8.50/10").
   - Returns the extracted score as a float.
   - If no score is found, it returns 0.

2. Defines an example `code_sample` string, which can be replaced with actual code.

3. Calls `get_pylint_score()` with the `code_sample` to get the Pylint score.

4. Prints the calculated Pylint score.

Note: This script requires Pylint to be installed and accessible from the command line.
"""

import subprocess
import tempfile

def get_pylint_score(code):
    with tempfile.NamedTemporaryFile(delete=False, suffix=".py") as temp_file:
        temp_file.write(code.encode())
        temp_file_path = temp_file.name

    result = subprocess.run(["pylint", temp_file_path], capture_output=True, text=True)
    output = result.stdout

    for line in output.split("\n"):
        if "rated at" in line:
            score = line.split()[6].split("/")[0]  # Extract score before "/10"
            return float(score)
    return 0  # Default to 0 if no score found

# Example usage
code_sample = "print('Hello, world!')"  # Replace with your actual code
pylint_score = get_pylint_score(code_sample)
print(f"Pylint Score: {pylint_score}")


Pylint Score: 0.0


In [13]:
"""
This script tracks the carbon emissions of a given Python code snippet using the codecarbon library.

It utilizes the following library:

- codecarbon: Used to measure and report the carbon emissions associated with running the code.

The script works as follows:

1. Imports the EmissionsTracker class from the codecarbon library.

2. Initializes an EmissionsTracker object.

3. Starts the tracker using `tracker.start()`.

4. Executes the provided `code_sample` string, excluding any lines that start with "import", using the `exec()` function. This enables the code to run dynamically.

5. Stops the tracker using `tracker.stop()`.

6. Prints the final calculated CO2 emissions in kilograms.

Note: This script requires the codecarbon library to be installed. The `code_sample` variable must be defined before running the script.
"""
from codecarbon import EmissionsTracker

tracker = EmissionsTracker()
tracker.start()

# Execute the sample code (excluding import statements)
exec("\n".join(line for line in code_sample.split("\n") if not line.startswith("import")))

tracker.stop()
print(f"CO2 Emissions: {tracker.final_emissions} kg")


[codecarbon INFO @ 10:47:48] [setup] RAM Tracking...
[codecarbon INFO @ 10:47:48] [setup] CPU Tracking...
 Windows OS detected: Please install Intel Power Gadget to measure CPU

[codecarbon INFO @ 10:47:50] CPU Model on constant consumption mode: 12th Gen Intel(R) Core(TM) i5-12450H
[codecarbon INFO @ 10:47:50] [setup] GPU Tracking...
[codecarbon INFO @ 10:47:50] No GPU found.
[codecarbon INFO @ 10:47:50] >>> Tracker's metadata:
[codecarbon INFO @ 10:47:50]   Platform system: Windows-11-10.0.26100-SP0
[codecarbon INFO @ 10:47:50]   Python version: 3.12.1
[codecarbon INFO @ 10:47:50]   CodeCarbon version: 2.8.3
[codecarbon INFO @ 10:47:50]   Available RAM : 15.730 GB
[codecarbon INFO @ 10:47:50]   CPU count: 12
[codecarbon INFO @ 10:47:50]   CPU model: 12th Gen Intel(R) Core(TM) i5-12450H
[codecarbon INFO @ 10:47:50]   GPU count: None
[codecarbon INFO @ 10:47:50]   GPU model: None
[codecarbon INFO @ 10:47:54] Saving emissions data to file c:\Users\rahul\OneDrive\Desktop\Trithon\ai-model

Hello, world!
CO2 Emissions: 1.0060326812450504e-07 kg


In [15]:
"""
This script creates a Pandas DataFrame to store and display the analysis results of a Python code file.

It utilizes the following library:

- pandas: Used for data manipulation and analysis, specifically for creating and displaying DataFrames.

The script works as follows:

1. Imports the pandas library.

2. Creates a Pandas DataFrame named `df` with the following columns:
   - "code_complexity": Stores the cyclomatic complexity score (obtained from the previous code).
   - "pylint_score": Stores the Pylint score (obtained from the previous code).
   - "energy_consumption": Stores the CO2 emissions in kilograms (obtained from the previous code using codecarbon).
   - "repo": Stores the name of the GitHub repository being analyzed (obtained from the initial URL parsing).

3. The DataFrame is populated with the values of `complexity_score`, `pylint_score`, `tracker.final_emissions`, and `repo_name`, which are assumed to be defined from the preceding code.

4. The created DataFrame `df` is displayed.

Note: This script assumes that the variables `complexity_score`, `pylint_score`, `tracker.final_emissions`, and `repo_name` are already defined and hold the respective values calculated in the previous code segments.
"""
import pandas as pd

df = pd.DataFrame({
    "code_complexity": [complexity_score],
    "pylint_score": [pylint_score],
    "energy_consumption": [tracker.final_emissions],
    "repo": [repo_name]
})

df


Unnamed: 0,code_complexity,pylint_score,energy_consumption,repo
0,4.5,0.0,1.006033e-07,All-In-One-Python-Projects


In [19]:
dd = pd.read_csv("emissions.csv")
dd

Unnamed: 0,timestamp,project_name,run_id,experiment_id,duration,emissions,emissions_rate,cpu_power,gpu_power,ram_power,...,cpu_count,cpu_model,gpu_count,gpu_model,longitude,latitude,ram_total_size,tracking_mode,on_cloud,pue
0,2025-03-05T08:32:58,codecarbon,2e040b63-e8d8-483c-9cd0-e4ca0ecbe8f8,5b0fa12a-3dd7-45bb-9766-cc326314d9f1,0.019167,4.586363e-10,2.392781e-08,42.5,0.0,5.898877,...,12,12th Gen Intel(R) Core(TM) i5-12450H,,,-71.2,46.8,15.730339,machine,N,1.0
1,2025-03-05T08:38:31,codecarbon,266ed459-13a6-4cb9-835c-fd6135b63d30,5b0fa12a-3dd7-45bb-9766-cc326314d9f1,0.012576,9.347881e-08,7.432816e-06,42.5,0.0,5.898877,...,12,12th Gen Intel(R) Core(TM) i5-12450H,,,83.9764,21.4668,15.730339,machine,N,1.0
2,2025-03-05T08:40:07,codecarbon,db66d580-7738-4487-ae2b-aa28bb1cac17,5b0fa12a-3dd7-45bb-9766-cc326314d9f1,0.014314,1.108548e-07,7.744559e-06,42.5,0.0,5.898877,...,12,12th Gen Intel(R) Core(TM) i5-12450H,,,83.9764,21.4668,15.730339,machine,N,1.0
3,2025-03-05T09:06:06,codecarbon,87bece18-72fb-4613-91ca-48855583974f,5b0fa12a-3dd7-45bb-9766-cc326314d9f1,0.015469,1.240231e-07,8.017736e-06,42.5,0.0,5.898877,...,12,12th Gen Intel(R) Core(TM) i5-12450H,,,83.9764,21.4668,15.730339,machine,N,1.0
4,2025-03-05T09:07:37,codecarbon,0aba071a-475b-439d-9229-f3ba1fc63216,5b0fa12a-3dd7-45bb-9766-cc326314d9f1,0.014573,1.07967e-07,7.408905e-06,42.5,0.0,5.898877,...,12,12th Gen Intel(R) Core(TM) i5-12450H,,,83.9764,21.4668,15.730339,machine,N,1.0
5,2025-03-05T09:08:49,codecarbon,7080cc67-94ed-46f6-b1b4-8e5b2895e14b,5b0fa12a-3dd7-45bb-9766-cc326314d9f1,0.020792,5.26487e-10,2.532125e-08,42.5,0.0,5.898877,...,12,12th Gen Intel(R) Core(TM) i5-12450H,,,-71.2,46.8,15.730339,machine,N,1.0
6,2025-03-05T10:25:22,codecarbon,02fefa23-c6ae-4139-ac4b-2be2ca73d972,5b0fa12a-3dd7-45bb-9766-cc326314d9f1,0.016372,3.903456e-10,2.38427e-08,42.5,0.0,5.898877,...,12,12th Gen Intel(R) Core(TM) i5-12450H,,,-71.2,46.8,15.730339,machine,N,1.0
7,2025-03-05T10:47:54,codecarbon,e8d1ada2-8f41-40e6-90b0-4a63748d11c9,5b0fa12a-3dd7-45bb-9766-cc326314d9f1,0.012974,1.006033e-07,7.75452e-06,42.5,0.0,5.898877,...,12,12th Gen Intel(R) Core(TM) i5-12450H,,,83.9764,21.4668,15.730339,machine,N,1.0
