Build a Hackathon Management platform for end-to-end event handling
Deliverables:
1. Allow participants to register with details
2. Provide a list of hackathon themes or categories.
3. Teams can upload their projects (e.g., code repositories(github links),
documents, and presentations), Store submissions securely with timestamp.
4.Build AI/ML techniques to help judges with insights from the submissions like:
a. Feedback Suggestions:Auto-generate constructive feedback for
submissions to save judges time.
b. Scoring Assistance: LLMs can pre-score submissions by:
i. Summarizing key project features.
ii. Analyzing problem-statement adherence.
iii. Suggesting scores for innovation, feasibility, and impact based on
predefined prompts.

In [17]:
!pip install transformers



In [18]:
import sqlite3
import datetime
import re
from tabulate import tabulate
from datetime import datetime
from typing import Dict, List, Optional
import requests
import argparse
import base64
import json
from github import Github
from transformers import pipeline
import os

In [23]:
import datetime

def is_valid_email(email):
    pattern = r'^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}$'
    return re.match(pattern, email) is not None

def is_valid_github_link(link):
    pattern = r'^https?://(?:www\.)?github\.com/[\w-]+/[\w.-]+/?$'
    return re.match(pattern, link) is not None

def is_valid_presentation_link(link):
    pattern = r'^https?://(?:docs\.google\.com/presentation/|www\.slideshare\.net/|slides\.com/).+'
    return re.match(pattern, link) is not None

def is_valid_drive_link(link):
    pattern = r'^https?://(?:drive\.google\.com/|docs\.google\.com/)\S+'
    return re.match(pattern, link) is not None

def is_valid_linkedin_link(link):
    if not link:  # Optional field
        return True
    pattern = r'^https?://(?:www\.)?linkedin\.com/in/[\w-]+/?$'
    return re.match(pattern, link) is not None

def get_validated_input(prompt, validator_func, error_message):
    while True:
        value = input(prompt)
        if validator_func(value):
            return value
        print(error_message)

def create_database():
    conn = sqlite3.connect('hackathon.db')
    cursor = conn.cursor()

    # Create participants table
    cursor.execute('''
        CREATE TABLE IF NOT EXISTS participants (
            id INTEGER PRIMARY KEY AUTOINCREMENT,
            name TEXT NOT NULL,
            email TEXT NOT NULL UNIQUE,
            team_name TEXT NOT NULL,
            registration_timestamp TIMESTAMP DEFAULT CURRENT_TIMESTAMP
        )
    ''')

    # Create themes table
    cursor.execute('''
        CREATE TABLE IF NOT EXISTS themes (
            id INTEGER PRIMARY KEY AUTOINCREMENT,
            email TEXT NOT NULL,
            theme TEXT NOT NULL,
            problem_statement TEXT NOT NULL,
            timestamp TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
            FOREIGN KEY (email) REFERENCES participants(email)
        )
    ''')

    # Create submissions table
    cursor.execute('''
        CREATE TABLE IF NOT EXISTS submissions (
            id INTEGER PRIMARY KEY AUTOINCREMENT,
            email TEXT NOT NULL,
            theme TEXT NOT NULL,
            git_link TEXT,
            ppt_link TEXT,
            drive_link TEXT,
            linkedin_link TEXT,
            problem_statement TEXT NOT NULL,
            timestamp TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
            FOREIGN KEY (email) REFERENCES participants(email)
        )
    ''')

    cursor.execute('''
        CREATE TABLE IF NOT EXISTS problem_statements (
          id INTEGER PRIMARY KEY AUTOINCREMENT,
          theme TEXT NOT NULL,
          problem_statement TEXT NOT NULL,
          details TEXT NOT NULL,
          FOREIGN KEY (theme) REFERENCES themes(theme)
        )
    ''')

    conn.commit()
    conn.close()
    print("Database initialized successfully!")



def drop_all_tables():
    conn = sqlite3.connect('hackathon.db')
    cursor = conn.cursor()

    try:
        # List of tables to drop
        tables = ['participants', 'themes', 'submissions', 'problem_statements']
        #tables = ['problem_statements']

        for table in tables:
            cursor.execute(f"DROP TABLE IF EXISTS {table}")
        conn.commit()
        print("All tables dropped successfully!")
    except sqlite3.Error as e:
        print(f"An error occurred: {e}")
    finally:
        conn.close()

drop_all_tables()
def add_problem_statements():
    conn = sqlite3.connect('hackathon.db')
    cursor = conn.cursor()

    problem_statements = {
        "AI/ML": [
            {
                "title": "Build a chatbot for mental health",
                "details": "Develop a conversational AI chatbot to provide mental health support, including identifying user moods, offering calming exercises, and directing users to professional resources. Requirements: Implement natural language understanding, sentiment analysis, and integration with a database of mental health tips."
            },
            {
                "title": "Implement a GPT-based Content Summarizer",
                "details": "Create a content summarization system powered by GPT models. The system should take input in the form of long text articles or documents and generate concise summaries that capture the key points and important information"
            }
        ],
        "Web Development": [
            {
                "title": "Develop a responsive e-commerce site",
                "details": "Build a fully functional e-commerce website with features like user authentication, product catalog, cart functionality, and a payment gateway. Requirements: Ensure the site is responsive, follows modern UI/UX principles, and implements secure user data handling."
            },
            {
                "title": "Create a personal portfolio website",
                "details": "Design and develop a personal portfolio site to showcase your projects, skills, and resume. Requirements: Include an interactive gallery for projects, a contact form, and a section for your blog or updates. The site should be mobile-friendly and visually appealing."
            }
        ],
        "Mobile Development": [
            {
                "title": "Design a fitness tracking app",
                "details": "Develop a mobile app to track fitness activities like steps, calories burned, and workout progress. The app should include goal setting and reminders. Requirements: Integrate with mobile device sensors or APIs, provide a clean UI, and include basic data analytics for user insights."
            },
            {
                "title": "Create a real-time chat application",
                "details": "Build a mobile app that enables real-time messaging between users, with features like user authentication, message history, and notifications. Requirements: Use Firebase or similar services for backend support and ensure secure communication."
            }
        ],
        "Cybersecurity": [
            {
                "title": "Implement a phishing detection tool",
                "details": "Develop a tool to detect phishing emails or websites using machine learning or rule-based approaches. The tool should alert users when potential phishing content is detected. Requirements: Use publicly available phishing datasets and provide a user-friendly interface for testing."
            },
            {
                "title": "Develop a secure file storage system",
                "details": "Create a secure system for storing files, ensuring encryption during storage and transmission. Include features for role-based access control. Requirements: Implement AES encryption, a database for metadata, and a simple web or desktop client for interaction."
            }
        ],
        "Data Science": [
            {
                "title": "Analyze customer churn data",
                "details": "Perform data analysis on customer churn datasets to identify factors influencing customer retention. Develop a predictive model and provide actionable insights. Requirements: Use Python with libraries like Pandas, NumPy, and scikit-learn, and create visualizations using Matplotlib or Seaborn."
            },
            {
                "title": "Create a real-time dashboard for sales",
                "details": "Develop a dashboard to visualize sales data in real-time, including metrics like total sales, top-selling products, and regional performance. Requirements: Use Python, Flask/Django for backend, and Dash or Plotly for visualization. Connect the dashboard to a live data source or simulate real-time updates."
            }
        ]
    }

    try:
        for theme, statements in problem_statements.items():
            for ps in statements:
                cursor.execute('''
                    INSERT INTO problem_statements (theme, problem_statement,details)
                    VALUES (?, ?, ?)
                ''', (theme, ps['title'], ps['details']))
        conn.commit()
        print("\nExpanded problem statements added successfully!")

    except sqlite3.Error as e:
        print(f"An error occurred: {e}")
    finally:
        conn.close()

def display_themes():
    themes = ["AI/ML", "Web Development", "Mobile Development", "Cybersecurity", "Data Science"]
    print("\nAvailable Hackathon Themes:")
    for i, theme in enumerate(themes):
        print(f"{i + 1}. {theme}")
    return themes

def register_participant():
    conn = sqlite3.connect('hackathon.db')
    cursor = conn.cursor()

    print("\nParticipant Registration")
    name = input("Enter your name: ")
    email = get_validated_input(
        "Enter your email: ",
        is_valid_email,
        "Invalid email format! Example: user@example.com"
    )
    team_name = input("Enter your team name: ")

    try:
        cursor.execute('''
            INSERT INTO participants (name, email, team_name)
            VALUES (?, ?, ?)
        ''', (name, email, team_name))
        conn.commit()
        print(f"Participant {name} registered successfully!")
    except sqlite3.Error as e:
        print(f"An error occurred: {e}")
    finally:
        conn.close()

def choose_theme():
    conn = sqlite3.connect('hackathon.db')
    cursor = conn.cursor()

    print("\nChoose Hackathon Theme")
    email = get_validated_input(
        "Enter your registered email: ",
        is_valid_email,
        "Invalid email format! Example: user@example.com"
    )

    try:
        cursor.execute("SELECT email FROM participants WHERE email = ?", (email,))
        participant = cursor.fetchone()

        if not participant:
            print("\nEmail not found. Please register first.")
            return

        themes = display_themes()
        theme_choice = int(input("\nChoose a theme by number: "))

        if theme_choice < 1 or theme_choice > len(themes):
            print("\nInvalid theme choice.")
            return

        selected_theme = themes[theme_choice - 1]
        # Retrieve problem statements for the selected theme
        cursor.execute("SELECT id, problem_statement, details FROM problem_statements WHERE LOWER(theme) = LOWER(?)", (selected_theme.strip(),))
        problem_statements = cursor.fetchall()

        if not problem_statements:
          print("\nNo problem statements available for this theme.")
          return

        print(f"\nProblem Statements for {selected_theme}:")
        for i, (ps_id, problem_statement, details) in enumerate(problem_statements):
          print(f"\n{i + 1}. {problem_statement}\n   Details: {details}")

        ps_choice = int(input("\nChoose a problem statement by number: "))

        if ps_choice < 1 or ps_choice > len(problem_statements):
          print("\nInvalid problem statement choice.")
          return

        selected_problem_statement = problem_statements[ps_choice - 1][1]  # Extract the problem statement text
        selected_details = problem_statements[ps_choice - 1][2]  # Extract the details
        timestamp = datetime.datetime.now()

        # Insert the chosen theme and problem statement into the themes table
        cursor.execute('''
            INSERT INTO themes (email, theme, problem_statement, timestamp)
            VALUES (?, ?, ?, ?)
        ''', (email, selected_theme, selected_problem_statement, timestamp))
        conn.commit()

        print(f"\nYou have chosen the theme '{selected_theme}' and problem statement:")
        print(f"{selected_problem_statement}")
        print("\nTheme and problem statement choice stored successfully!")

    except sqlite3.Error as e:
        print(f"An error occurred: {e}")
    finally:
        conn.close()

def submit_project():
    conn = sqlite3.connect('hackathon.db')
    cursor = conn.cursor()

    print("\nSubmit Project")
    email = get_validated_input(
        "Enter your registered email: ",
        is_valid_email,
        "Invalid email format! Example: user@example.com"
    )

    try:
        cursor.execute("SELECT theme, problem_statement FROM themes WHERE email = ?", (email,))
        submissions = cursor.fetchall()

        if not submissions:
            print("\nNo themes or problem statements chosen yet. Please choose first.")
            return

        print("\nThemes and Problem Statements you have chosen:")
        for i, (theme, ps) in enumerate(submissions):
            print(f"{i + 1}. Theme: {theme}, Problem Statement: {ps}")

        choice = int(input("\nChoose a submission by number to add project details: "))

        if choice < 1 or choice > len(submissions):
            print("\nInvalid choice.")
            return

        chosen_theme, chosen_ps = submissions[choice - 1]

        git_link = get_validated_input(
            "Enter your GitHub repository link: ",
            is_valid_github_link,
            "Invalid GitHub link! Example: https://github.com/username/repository"
        )

        ppt_link = get_validated_input(
            "Enter your presentation link: ",
            is_valid_presentation_link,
            "Invalid presentation link! Example: https://docs.google.com/presentation/d/..."
        )

        drive_link = get_validated_input(
            "Enter your Google Drive link: ",
            is_valid_drive_link,
            "Invalid Drive link! Example: https://drive.google.com/..."
        )

        linkedin_link = get_validated_input(
            "Enter your LinkedIn profile link (optional, press Enter to skip): ",
            is_valid_linkedin_link,
            "Invalid LinkedIn link! Example: https://linkedin.com/in/username"
        )

        timestamp = datetime.datetime.now()

        cursor.execute('''
          SELECT COUNT(*)
          FROM submissions
          WHERE email = ? AND theme = ? AND problem_statement = ?
          ''', (email, chosen_theme, chosen_ps))

        exists = cursor.fetchone()[0]  # Fetch count from result

        if exists > 0:
            # Update existing record
            cursor.execute('''
                UPDATE submissions
                SET git_link = ?, ppt_link = ?, drive_link = ?, linkedin_link = ?, timestamp = ?
                WHERE email = ? AND theme = ? AND problem_statement = ?
            ''', (git_link, ppt_link, drive_link, linkedin_link, timestamp, email, chosen_theme, chosen_ps))
            print("Submission updated successfully.")
        else:
            # Insert new record
            cursor.execute('''
                INSERT INTO submissions (email, theme, problem_statement, git_link, ppt_link, drive_link, linkedin_link, timestamp)
                VALUES (?, ?, ?, ?, ?, ?, ?, ?)
            ''', (email, chosen_theme, chosen_ps, git_link, ppt_link, drive_link, linkedin_link, timestamp))
            print("New submission added successfully.")

        # cursor.execute("SELECT git_link, ppt_link, drive_link, linkedin_link, timestamp, email, theme, problem_statement FROM submissions")
        # submissions = cursor.fetchall()
        # print(submissions)

        conn.commit()
        print("\nProject details submitted successfully!")

    except sqlite3.Error as e:
        print(f"An error occurred: {e}")
    finally:
        conn.close()

def main():
    create_database()
    flag=0
    if(flag==0):
      add_problem_statements()
      flag+=1
    while True:
        print("\nHackathon Management System")
        print("1. Register")
        print("2. Choose Theme")
        print("3. Submit Project")
        print("4. Exit")

        choice = input("Enter your choice: ")

        if choice == "1":
            register_participant()
        elif choice == "2":
            choose_theme()
        elif choice == "3":
            submit_project()
        elif choice == "4":
            print("\nThank you for using the Hackathon Management System. Goodbye!")
            break
        else:
            print("\nInvalid choice. Please try again.")

if __name__ == "__main__":
    main()

All tables dropped successfully!
Database initialized successfully!

Expanded problem statements added successfully!

Hackathon Management System
1. Register
2. Choose Theme
3. Submit Project
4. Exit
Enter your choice: 1

Participant Registration
Enter your name: Kanishka
Enter your email: s@a.com
Enter your team name: s
Participant Kanishka registered successfully!

Hackathon Management System
1. Register
2. Choose Theme
3. Submit Project
4. Exit
Enter your choice: 2

Choose Hackathon Theme
Enter your registered email: s@a.com

Available Hackathon Themes:
1. AI/ML
2. Web Development
3. Mobile Development
4. Cybersecurity
5. Data Science

Choose a theme by number: 1

Problem Statements for AI/ML:

1. Build a chatbot for mental health
   Details: Develop a conversational AI chatbot to provide mental health support, including identifying user moods, offering calming exercises, and directing users to professional resources. Requirements: Implement natural language understanding, sentiment

In [24]:
class HackathonDBViewer:
    def __init__(self, db_path: str = 'hackathon.db'):
        """Initialize connection to hackathon database"""
        try:
            self.conn = sqlite3.connect(db_path)
            self.cursor = self.conn.cursor()
            print(f"Connected to hackathon database at {db_path}")
        except sqlite3.Error as e:
            print(f"Error connecting to database: {e}")
            raise

    def view_participants(self):
        """Display all registered participants"""
        try:
            self.cursor.execute("""
                SELECT
                    p.name,
                    p.email,
                    p.team_name,
                    p.registration_timestamp,
                    COUNT(DISTINCT t.theme) as themes_chosen,
                    COUNT(DISTINCT s.theme) as submissions_made
                FROM participants p
                LEFT JOIN themes t ON p.email = t.email
                LEFT JOIN submissions s ON p.email = s.email
                GROUP BY p.email
                ORDER BY p.registration_timestamp DESC
            """)

            rows = self.cursor.fetchall()
            headers = ['Name', 'Email', 'Team', 'Registration Time', 'Themes Chosen', 'Submissions']

            print("\n=== REGISTERED PARTICIPANTS ===")
            print(tabulate(rows, headers=headers, tablefmt='grid'))
            print(f"\nTotal Participants: {len(rows)}")

        except sqlite3.Error as e:
            print(f"Error viewing participants: {e}")

    def view_themes(self):
        """Display theme choices by participants"""
        try:
            self.cursor.execute("""
                SELECT
                    p.name,
                    t.theme,
                    t.problem_statement,
                    t.timestamp,
                    CASE
                        WHEN s.theme IS NOT NULL THEN 'Yes'
                        ELSE 'No'
                    END AS has_submitted
                FROM participants p
                LEFT JOIN themes t ON p.email = t.email
                LEFT JOIN submissions s ON p.email = s.email AND t.theme = s.theme
                ORDER BY t.timestamp DESC
            """)

            rows = self.cursor.fetchall()
            headers = ['Name', 'Theme', 'Problem Statement', 'Theme Choice Time', 'Has Submitted']

            print("\n=== THEME CHOICES ===")
            print(tabulate(rows, headers=headers, tablefmt='grid'))
            print(f"\nTotal Themes Chosen: {len(rows)}")

        except sqlite3.Error as e:
            print(f"Error viewing themes: {e}")

    def view_submissions(self):
        """Display submissions made by participants"""
        try:
            self.cursor.execute("""
                SELECT
                    p.name,
                    s.theme,
                    s.problem_statement,
                    s.git_link,
                    s.ppt_link,
                    s.drive_link,
                    s.linkedin_link,
                    s.timestamp
                FROM submissions s
                JOIN participants p ON p.email = s.email
                ORDER BY s.timestamp DESC
            """)

            rows = self.cursor.fetchall()
            headers = ['Name', 'Theme', 'Problem Statement', 'GitHub Link', 'PPT Link', 'Drive Link', 'LinkedIn Link', 'Submission Time']

            print("\n=== SUBMISSIONS ===")
            print(tabulate(rows, headers=headers, tablefmt='grid'))
            print(f"\nTotal Submissions: {len(rows)}")

        except sqlite3.Error as e:
            print(f"Error viewing submissions: {e}")

    def close(self):
        """Close database connection"""
        self.conn.close()
        print("Database connection closed.")

# Example usage:
if __name__ == "__main__":
    viewer = HackathonDBViewer()

    while True:
        print("\nHackathon DB Viewer")
        print("1. View Participants")
        print("2. View Themes")
        print("3. View Submissions")
        print("4. Exit")

        choice = input("Enter your choice: ")

        if choice == "1":
            viewer.view_participants()
        elif choice == "2":
            viewer.view_themes()
        elif choice == "3":
            viewer.view_submissions()
        elif choice == "4":
            viewer.close()
            break
        else:
            print("Invalid choice. Please try again.")


Connected to hackathon database at hackathon.db

Hackathon DB Viewer
1. View Participants
2. View Themes
3. View Submissions
4. Exit
Enter your choice: 1

=== REGISTERED PARTICIPANTS ===
+----------+---------+--------+---------------------+-----------------+---------------+
| Name     | Email   | Team   | Registration Time   |   Themes Chosen |   Submissions |
| Kanishka | s@a.com | s      | 2024-12-26 06:17:00 |               1 |             1 |
+----------+---------+--------+---------------------+-----------------+---------------+

Total Participants: 1

Hackathon DB Viewer
1. View Participants
2. View Themes
3. View Submissions
4. Exit
Enter your choice: 2

=== THEME CHOICES ===
+----------+---------+------------------------------------------+----------------------------+-----------------+
| Name     | Theme   | Problem Statement                        | Theme Choice Time          | Has Submitted   |
| Kanishka | AI/ML   | Implement a GPT-based Content Summarizer | 2024-12-26 06:17:

In [None]:
!pip install PyGithub

Collecting PyGithub
  Downloading PyGithub-2.5.0-py3-none-any.whl.metadata (3.9 kB)
Collecting pynacl>=1.4.0 (from PyGithub)
  Downloading PyNaCl-1.5.0-cp36-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl.metadata (8.6 kB)
Downloading PyGithub-2.5.0-py3-none-any.whl (375 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m375.9/375.9 kB[0m [31m9.4 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading PyNaCl-1.5.0-cp36-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl (856 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m856.7/856.7 kB[0m [31m40.5 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: pynacl, PyGithub
Successfully installed PyGithub-2.5.0 pynacl-1.5.0


In [None]:
#21pd34@psgtech.ac.in
def get_git_link(email):
    conn = sqlite3.connect('hackathon.db')
    cursor = conn.cursor()

    try:
        cursor.execute("SELECT git_link FROM submissions WHERE email = ?", (email,))
        result = cursor.fetchall()
        if result:
            print("\nAvailable Git Links for", email)
            for i, (link,) in enumerate(result):
                print(f"{i+1}. {link}")

            choice = int(input("Choose a link by number: "))
            if 1 <= choice <= len(result):
                selected_link = result[choice-1][0]
                print(f"Selected Link: {selected_link}")
                return selected_link
            else:
                print("Invalid choice")
                return None
        else:
            print(f"No Git links found for email: {email}")
            return None

    except sqlite3.Error as e:
        print(f"An error occurred: {e}")
        return None
    finally:
        conn.close()

# # Example usage
# if __name__ == "__main__":
#   email = input("Enter the registered email: ")
#   git_link =  "https://github.com/KanHarI/gpt-commit-summarizer" #get_git_link(email)

In [None]:
def get_repo_info(repo_url):
    """
    Fetches information about the GitHub repository including:
    - Name
    - Description
    - Stars
    - Forks
    - License
    - Primary language
    - Open issues count
    - Watchers count
    - Creation and update times
    """
    # Extract the username and repository name from the URL
    repo_parts = repo_url.rstrip('/').split('/')[-2:]
    if len(repo_parts) != 2:
        raise ValueError("Invalid GitHub repository URL.")

    user, repo = repo_parts

    # GitHub API URL to get repository info
    api_url = f"https://api.github.com/repos/{user}/{repo}"

    # Make the API request
    response = requests.get(api_url)

    if response.status_code != 200:
        raise ValueError(f"Failed to fetch data for {repo_url}, status code: {response.status_code}")

    repo_info = response.json()

    # Check if the repository data is valid
    if not repo_info:
        raise ValueError(f"Could not fetch details for {repo_url}. The repository might not exist or there might be a problem.")

    # Extract relevant details with safe get() calls
    project_info = {
        "Name": repo_info.get("name", "N/A"),
        "Description": repo_info.get("description", "No description available"),
        "Stars": repo_info.get("stargazers_count", "N/A"),
        "Forks": repo_info.get("forks_count", "N/A"),
        "Language": repo_info.get("language", "N/A"),
        "License": repo_info.get("license", {}).get("name", "N/A"),
        "Open Issues": repo_info.get("open_issues_count", "N/A"),
        "Watchers": repo_info.get("watchers_count", "N/A"),
        "Default Branch": repo_info.get("default_branch", "N/A"),
        "Created At": repo_info.get("created_at", "N/A"),
        "Updated At": repo_info.get("updated_at", "N/A"),
    }

    return project_info

def get_readme_content(user, repo):
    """
    Fetches the README content from the repository (if available).
    Returns the decoded base64 content of the README.
    """
    readme_url = f"https://api.github.com/repos/{user}/{repo}/readme"
    response = requests.get(readme_url)

    if response.status_code == 200:
        readme_data = response.json()
        content = readme_data.get("content", "")
        # Decode base64 encoded content
        decoded_content = base64.b64decode(content).decode('utf-8')
        return decoded_content
    else:
        return "No README found"

# Removed argparse and main function
# Added input to get repo_url from user

# repo_url = input("Enter the GitHub repository URL: ")
email = input("Enter the registered email: ")
repo_url = get_git_link(email)

try:
    # Extract user and repo name from URL
    repo_parts = repo_url.rstrip('/').split('/')[-2:]
    if len(repo_parts) != 2:
        raise ValueError("Invalid GitHub repository URL.")

    user, repo = repo_parts

    # Fetch repository info
    info = get_repo_info(repo_url)
    print("\nRepository Information:")
    for key, value in info.items():
        print(f"{key}: {value}")

    # Fetch and print README content (if available)
    print("\nREADME Content:")
    readme_content = get_readme_content(user, repo)
    # print(readme_content[:1000])  # Print first 1000 characters of the README content
    print(readme_content)

except Exception as e:
    print(f"Error: {e}")

Enter the registered email: k@h.com

Available Git Links for k@h.com
1. https://github.com/links-lang/links-examples
2. https://github.com/KanHarI/gpt-commit-summarizer
Choose a link by number: 2
Selected Link: https://github.com/KanHarI/gpt-commit-summarizer

Repository Information:
Name: gpt-commit-summarizer
Description: None
Stars: 556
Forks: 36
Language: JavaScript
License: MIT License
Open Issues: 8
Watchers: 556
Default Branch: master
Created At: 2022-12-05T18:19:24Z
Updated At: 2024-12-24T03:15:35Z

README Content:
# gpt-commit-summarizer

See [announcement blogpost](https://medium.com/@knaan.harpaz/leverage-openais-language-model-for-automated-commit-summaries-8181cef30375?source=friends_link&sk=b71a6799548f52274d2d0888e9bfd97e).

Don't have time and want to get hacking right away? Check out the [Getting Started](#getting-started) section.

* [Getting Started](#getting-started)
* [Troubleshooting](#troubleshooting)
* [Encountered any bugs?](#encountered-any-bugs)

The `gpt-com

In [None]:
# Assuming you have a GitHub personal access token stored in the environment variable GITHUB_TOKEN
# If not, replace with your actual token.
# For example, if the token is stored in a variable 'token':
# github = Github(token)
g = Github() #For testing

def get_git_link(email):
    conn = sqlite3.connect('hackathon.db')
    cursor = conn.cursor()

    try:
        cursor.execute("SELECT git_link FROM submissions WHERE email = ?", (email,))
        result = cursor.fetchall()
        if result:
            print("\nAvailable Git Links for", email)
            for i, (link,) in enumerate(result):
                print(f"{i+1}. {link}")

            choice = int(input("Choose a link by number: "))
            if 1 <= choice <= len(result):
                selected_link = result[choice-1][0]
                print(f"Selected Link: {selected_link}")
                return selected_link
            else:
                print("Invalid choice")
                return None
        else:
            print(f"No Git links found for email: {email}")
            return None

    except sqlite3.Error as e:
        print(f"An error occurred: {e}")
        return None
    finally:
        conn.close()

def clone_repo(repo_url):
    try:
      repo_name = repo_url.split('/')[-1].replace('.git', '')  # Extract the repo name from the URL
      !git clone {repo_url} {repo_name}
      print(f"Repository cloned successfully to {repo_name} directory.")
      return repo_name
    except Exception as e:
      print(f"Error cloning repository: {e}")
      return None

clone_repo(repo_url)

Cloning into 'gpt-commit-summarizer'...
remote: Enumerating objects: 1227, done.[K
remote: Counting objects: 100% (107/107), done.[K
remote: Compressing objects: 100% (105/105), done.[K
remote: Total 1227 (delta 2), reused 2 (delta 2), pack-reused 1120 (from 2)[K
Receiving objects: 100% (1227/1227), 616.64 KiB | 13.41 MiB/s, done.
Resolving deltas: 100% (662/662), done.
Repository cloned successfully to gpt-commit-summarizer directory.


'gpt-commit-summarizer'

README File Summarization

In [None]:
summarizer = pipeline("summarization", model="facebook/bart-large-cnn")

def summarize_text(text):
    max_input_length = 3000
    truncated_text = text[:max_input_length]  # Truncate text to fit model input size
    summary = summarizer(truncated_text, max_length=200, min_length=30, do_sample=False)
    return summary[0]['summary_text']

# Function to load the content of a file
def load_file(file_path):
    with open(file_path, 'r', encoding='utf-8') as file:
        return file.read()

# Example usage:
file_content = load_file("/content/gpt-commit-summarizer/Readme.md")
summary = summarize_text(file_content)
print("Summary:\n", summary)


Device set to use cpu


Summary:
 The gpt-commit-summarizer GitHub Action is a powerful tool that harnesses the capabilities of OpenAI's text-davinci-003 large language model. It provides summaries of the changes introduced by a pull request in a repository. The action can greatly enhance collaboration and understanding in large codebases.


Feedback based on summarised readme content

In [None]:
def generate_feedback(summary):
    """
    Generate constructive feedback using a Hugging Face model.

    Args:
        summary (str): The summary of a README file.

    Returns:
        str: Constructive feedback on the summary.
    """
    # Load the model and pipeline
    model_name = "EleutherAI/gpt-neo-1.3B"  # Alternatively: 'google/flan-t5-large'
    feedback_pipeline = pipeline("text-generation", model=model_name, max_length=300,truncation=True,pad_token_id=50256)

    # Create the prompt
    prompt = f"""
    Based on the following summary of a README file, generate constructive feedback that includes:
    1. Positive aspects of the content.
    2. Suggestions for improvement or additional content that could enhance the clarity, structure, or usefulness of the document.

    Summary:
    {summary}
    """

    # Generate feedback
    response = feedback_pipeline(prompt, num_return_sequences=1, temperature=0.7)

    # Return the generated text
    return response[0]["generated_text"]

feedback = generate_feedback(summary)
print("Constructive Feedback:\n", feedback)

config.json:   0%|          | 0.00/1.35k [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/5.31G [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/200 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/798k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/90.0 [00:00<?, ?B/s]

Device set to use cpu


Constructive Feedback:
 
    Based on the following summary of a README file, generate constructive feedback that includes:
    1. Positive aspects of the content.
    2. Suggestions for improvement or additional content that could enhance the clarity, structure, or usefulness of the document.

    Summary:
    The gpt-commit-summarizer GitHub Action is a powerful tool that harnesses the capabilities of OpenAI's text-davinci-003 large language model. It provides summaries of the changes introduced by a pull request in a repository. The action can greatly enhance collaboration and understanding in large codebases.
    
    **NOTE**: This action requires a GitHub account, an account with at least 5 repos.
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    



In [None]:
# from transformers import pipeline
# import os

# def analyze_code(file_path, model_name="google/flan-t5-base"):
#     """
#     Analyze the provided code file to extract purpose, key functions, and dependencies.
#     Optimized for faster execution.
#     """
#     with open(file_path, "r") as file:
#         code_content = file.read()

#     # Limit the input length to the first 3000 characters
#     code_content = code_content[:3000]

#     # Initialize Hugging Face pipeline
#     summarizer = pipeline("text2text-generation", model=model_name, tokenizer=model_name, truncation=True)

#     # Define the prompt for analysis
#     prompt = f"""
#     Analyze the following code and provide a structured summary:

#     1. Purpose: Describe the overall purpose of the code or project.
#     2. Key Functions: Summarize the main functions or methods and their roles in the project.
#     3. Dependencies: List the external libraries or modules used in the code, and describe their significance.
#     4. Tools and Technologies used.

#     Code:
#     {code_content}

#     Provide the analysis in a structured format.
#     """

#     # Generate the analysis with optimized parameters
#     result = summarizer(prompt, max_length=300, truncation=True)
#     return result[0]["generated_text"]

# def process_repository(directory_path):
#     """
#     Process all Python files in the given directory for code analysis.
#     Optimized for faster execution.
#     """
#     for filename in os.listdir(directory_path):
#         file_path = os.path.join(directory_path, filename)
#         if os.path.isfile(file_path):
#             try:
#                 print(f"\n--- Analyzing {filename} ---\n")
#                 analysis = analyze_code(file_path)
#                 print(f"**Analysis for {filename}:**\n{analysis}\n")
#             except Exception as e:
#                 print(f"Error analyzing {filename}: {e}")

# # Provide the path to the repository or code directory
# repo_directory = "/content/gpt-commit-summarizer"

# if os.path.exists(repo_directory):
#     process_repository(repo_directory)
# else:
#     print(f"Error: Directory '{repo_directory}' not found.")

SCORING ASSISTANCE

In [16]:
def analyze_code(file_path, model_name="google/flan-t5-base"):
    """
    Analyze the provided code file to extract purpose, key functions, and dependencies.
    """
    with open(file_path, "r") as file:
        code_content = file.read()

    # Truncate input to reduce processing time
    # code_content = code_content[:3000]

    # Initialize the Hugging Face pipeline
    summarizer = pipeline("text2text-generation", model=model_name, tokenizer=model_name, truncation=True)

    # Define the prompt for analysis
    prompt = f"""
    Analyze the following code to provide concise summaries for judges:

    1. Purpose: Describe the overall purpose of the code or project.
    2. Key Functions: Summarize the main functions or methods and their roles in the project.
    3. Dependencies: List the external libraries or modules used in the code, and describe their significance.
    4. Tools and Technologies used.

    Code:
    {code_content}

    Provide the analysis in a structured format.
    """

    # Generate the analysis
    result = summarizer(prompt, max_length=400, truncation=True)
    return result[0]["generated_text"]

def score_submission(analysis_text, model_name="google/flan-t5-base"):
    """
    Score the combined submission based on the analysis provided.
    """
    # Initialize the Hugging Face pipeline
    scorer = pipeline("text2text-generation", model=model_name, tokenizer=model_name, truncation=True)

    # Define the scoring prompt
    scoring_prompt = f"""
    Based on the following project analysis, assign scores on a scale of 1 to 10 for the following criteria:
    1. Key Project Features: Are the project's features well-defined, relevant, and complete?
    2. Problem-Statement Adherence: Does the project address the stated problem effectively?
    3. Innovation: How innovative or unique is the solution or approach?
    4. Feasibility: How practical and implementable is the project?
    5. Impact: How significant is the potential impact or usefulness of the project?

    For each aspect, also provide a justification for the score given, explaining why the score was assigned.


    Analysis:
    {analysis_text}

    Provide the scores in the format:
    Key Project Features: X/10
    Problem-Statement Adherence: Y/10
    Innovation: Z/10
    Feasibility: W/10
    Impact: V/10
    """

    # Generate the scores
    result = scorer(scoring_prompt, max_length=200, truncation=True)
    # return result[0]["generated_text"]
    return result

def process_repository_and_score(directory_path, model_name="google/flan-t5-base"):
    """
    Process all files in the given directory and its subdirectories, analyze them, and generate a combined score.
    """
    combined_analysis = ""

    # Walk through all directories and files in the root directory and subdirectories
    for root, dirs, files in os.walk(directory_path):
        for filename in files:
            file_path = os.path.join(root, filename)
            if filename.endswith(('.py', '.ts', '.js','.json')):
              try:
                  print(f"\n--- Analyzing {filename} ---\n")
                  # Get the analysis of the current file
                  analysis = analyze_code(file_path, model_name)
                  combined_analysis += f"\n--- Analysis of {filename} ---\n{analysis}\n"
              except Exception as e:
                  print(f"Error analyzing {filename}: {e}")

    if combined_analysis:
        # After processing all files, score the combined analysis
        print("\n--- Scoring Combined Analysis ---\n")
        score = score_submission(combined_analysis, model_name)
        print(f"**Overall Scores for All Files:**\n{score}")
    else:
        print("No files found in the directory.")

# Provide the path to the repository or code directory
repo_directory = "/content/gpt-commit-summarizer"  # Update this to your specific directory

if os.path.exists(repo_directory):
    process_repository_and_score(repo_directory)
else:
    print(f"Error: Directory '{repo_directory}' not found.")


--- Analyzing tsconfig.json ---



config.json:   0%|          | 0.00/1.40k [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/990M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/147 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/2.54k [00:00<?, ?B/s]

spiece.model:   0%|          | 0.00/792k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/2.42M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/2.20k [00:00<?, ?B/s]

Device set to use cpu



--- Analyzing package.json ---



Device set to use cpu



--- Analyzing .eslintrc.js ---



Device set to use cpu



--- Analyzing .prettierrc.json ---



Device set to use cpu



--- Analyzing index.js ---



Device set to use cpu



--- Analyzing filesSummary.ts ---



Device set to use cpu



--- Analyzing openAi.ts ---



Device set to use cpu



--- Analyzing octokit.ts ---



Device set to use cpu



--- Analyzing summarizePr.ts ---



Device set to use cpu



--- Analyzing sharedPrompt.ts ---



Device set to use cpu



--- Analyzing DiffMetadata.ts ---



Device set to use cpu



--- Analyzing commitSummary.ts ---



Device set to use cpu



--- Analyzing index.ts ---



Device set to use cpu



--- Scoring Combined Analysis ---



Device set to use cpu


**Overall Scores for All Files:**
[{'generated_text': '1'}]


END-------SCORING ASSISTANCE-----END

In [None]:
!pip install selenium

In [None]:
!pip install fake-useragent

In [None]:
import requests
from bs4 import BeautifulSoup
import json
import random
import time
from fake_useragent import UserAgent
import re

class LinkedInPublicScraper:
    def __init__(self):
        """Initialize the scraper with rotating user agents"""
        self.ua = UserAgent()
        self.session = requests.Session()

    def get_headers(self):
        """Generate random headers for each request"""
        return {
            'User-Agent': self.ua.random,
            'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
            'Accept-Language': 'en-US,en;q=0.5',
            'DNT': '1',
            'Connection': 'keep-alive',
            'Upgrade-Insecure-Requests': '1',
        }

    def scrape_profile(self, profile_url):
        """
        Scrape public LinkedIn profile
        Args:
            profile_url: Public LinkedIn profile URL
        Returns:
            dict: Profile information
        """
        try:
            # Add random delay to avoid detection
            time.sleep(random.uniform(1, 3))

            # Make request
            response = self.session.get(
                profile_url,
                headers=self.get_headers(),
                timeout=10
            )

            if response.status_code != 200:
                return {
                    "status": "error",
                    "message": f"Failed to fetch profile. Status code: {response.status_code}"
                }

            # Parse HTML
            soup = BeautifulSoup(response.text, 'html.parser')

            # Extract profile data
            profile_data = {
                "status": "success",
                "profile_url": profile_url,
                "data": {
                    "basic_info": self._extract_basic_info(soup),
                    "about": self._extract_about(soup),
                    "experience": self._extract_experience(soup),
                    "education": self._extract_education(soup),
                    "skills": self._extract_skills(soup),
                    "certifications": self._extract_certifications(soup)
                }
            }

            # Save to file
            self._save_to_json(profile_data)

            return profile_data

        except requests.exceptions.RequestException as e:
            return {
                "status": "error",
                "message": f"Network error: {str(e)}"
            }
        except Exception as e:
            return {
                "status": "error",
                "message": f"Scraping error: {str(e)}"
            }

    def _extract_basic_info(self, soup):
        """Extract basic profile information"""
        basic_info = {}
        try:
            # Name (using common LinkedIn class patterns)
            name_elem = soup.find('h1', {'class': re.compile(r'text-heading-xlarge.*')})
            if name_elem:
                basic_info['name'] = name_elem.get_text().strip()

            # Headline
            headline_elem = soup.find('div', {'class': re.compile(r'text-body-medium.*')})
            if headline_elem:
                basic_info['headline'] = headline_elem.get_text().strip()

            # Location
            location_elem = soup.find('span', {'class': re.compile(r'text-body-small.*location.*')})
            if location_elem:
                basic_info['location'] = location_elem.get_text().strip()

        except Exception as e:
            basic_info['error'] = str(e)

        return basic_info

    def _extract_about(self, soup):
        """Extract about section"""
        try:
            about_section = soup.find('div', {'class': re.compile(r'.*about-section.*')})
            if about_section:
                return about_section.get_text().strip()
        except:
            pass
        return ""

    def _extract_experience(self, soup):
        """Extract work experience"""
        experiences = []
        try:
            exp_section = soup.find('div', {'id': re.compile(r'.*experience.*')})
            if exp_section:
                exp_items = exp_section.find_all('li', {'class': re.compile(r'.*experience-item.*')})
                for item in exp_items:
                    exp = {}
                    title_elem = item.find('h3', {'class': re.compile(r'.*t-16.*')})
                    if title_elem:
                        exp['title'] = title_elem.get_text().strip()
                    company_elem = item.find('p', {'class': re.compile(r'.*company-name.*')})
                    if company_elem:
                        exp['company'] = company_elem.get_text().strip()
                    duration_elem = item.find('span', {'class': re.compile(r'.*date-range.*')})
                    if duration_elem:
                        exp['duration'] = duration_elem.get_text().strip()
                    experiences.append(exp)
        except:
            pass
        return experiences

    def _extract_education(self, soup):
        """Extract education information"""
        education = []
        try:
            edu_section = soup.find('section', {'id': re.compile(r'.*education.*')})
            if edu_section:
                edu_items = edu_section.find_all('li', {'class': re.compile(r'.*education.*')})
                for item in edu_items:
                    edu = {}
                    school_elem = item.find('h3', {'class': re.compile(r'.*school-name.*')})
                    if school_elem:
                        edu['school'] = school_elem.get_text().strip()
                    degree_elem = item.find('span', {'class': re.compile(r'.*degree-name.*')})
                    if degree_elem:
                        edu['degree'] = degree_elem.get_text().strip()
                    education.append(edu)
        except:
            pass
        return education

    def _extract_skills(self, soup):
        """Extract skills"""
        skills = []
        try:
            skills_section = soup.find('section', {'class': re.compile(r'.*skills.*')})
            if skills_section:
                skill_items = skills_section.find_all('span', {'class': re.compile(r'.*skill-name.*')})
                skills = [skill.get_text().strip() for skill in skill_items]
        except:
            pass
        return skills

    def _extract_certifications(self, soup):
        """Extract certifications"""
        certifications = []
        try:
            cert_section = soup.find('section', {'id': re.compile(r'.*certifications.*')})
            if cert_section:
                cert_items = cert_section.find_all('li', {'class': re.compile(r'.*certification.*')})
                for item in cert_items:
                    cert = {}
                    name_elem = item.find('h3', {'class': re.compile(r'.*t-16.*')})
                    if name_elem:
                        cert['name'] = name_elem.get_text().strip()
                    issuer_elem = item.find('p', {'class': re.compile(r'.*certifier.*')})
                    if issuer_elem:
                        cert['issuer'] = issuer_elem.get_text().strip()
                    certifications.append(cert)
        except:
            pass
        return certifications

    def _save_to_json(self, data):
        """Save scraped data to JSON file"""
        try:
            with open('linkedin_profile.json', 'w', encoding='utf-8') as f:
                json.dump(data, f, indent=2, ensure_ascii=False)
        except Exception as e:
            print(f"Error saving to JSON: {e}")

def scrape_public_profile(profile_url):
    """Main function to scrape public LinkedIn profile"""
    scraper = LinkedInPublicScraper()
    return scraper.scrape_profile(profile_url)

# Example usage
if __name__ == "__main__":
    url = input("Enter public LinkedIn profile URL: ")
    result = scrape_public_profile(url)
    print("\nScraped Profile Data:")
    print(json.dumps(result, indent=2))

Enter public LinkedIn profile URL: https://www.linkedin.com/in/v-abinaya/

Scraped Profile Data:
{
  "status": "error",
  "message": "Failed to fetch profile. Status code: 999"
}
