In [None]:
# Link Detection System

import re
import pandas as pd

def detect_links(text):
    url_pattern = r'https?://[^\s]+'
    matches = re.findall(url_pattern, text)
    return matches

def classify_link(link):
    if 'github.com' in link:
        return 'GitHub'
    elif 'drive.google.com' in link:
        return 'Google Drive'
    elif 'dropbox.com' in link:
        return 'Dropbox'
    elif 'linkedin.com' in link:
        return 'LinkedIn'
    elif 'youtube.com' in link:
        return 'YouTube'
    else:
        return 'Other'

def process_resume_text(text):
    links = detect_links(text)
    return pd.DataFrame({
        'link': links,
        'platform': [classify_link(link) for link in links]
    })

# Example usage
sample_resume = """
Check out my GitHub at https://github.com/username/project
My drive folder is here: https://drive.google.com/drive/folders/sample
"""
link_df = process_resume_text(sample_resume)
print(link_df)

                                            link      platform
0            https://github.com/username/project        GitHub
1  https://drive.google.com/drive/folders/sample  Google Drive


Module 1: GitHub Profile Crawler

In [None]:
from github import Github

def get_repositories(username, token):
    g = Github(token)
    user = g.get_user(username)
    return [repo.name for repo in user.get_repos()]


Module 2: Repo Metrics Extractor


In [None]:
def extract_repo_metrics(username, token):
    g = Github(token)
    user = g.get_user(username)
    data = []
    for repo in user.get_repos():
        data.append({
            'repo': repo.name,
            'stars': repo.stargazers_count,
            'forks': repo.forks_count,
            'language': repo.language,
            'topics': repo.get_topics()
        })
    return pd.DataFrame(data)


Module 3: Commit and Code Analysis

In [None]:
def get_commit_stats(repo):
    commits = repo.get_commits()
    return {
        'total_commits': commits.totalCount,
        'recent_commit': commits[0].commit.message if commits.totalCount > 0 else 'None'
    }


Module 4: Repo Strength Scorer (Simple Heuristic)

In [None]:
def score_repository(stars, forks, commits):
    return stars * 0.4 + forks * 0.3 + commits * 0.3  # Simple scoring weights

def assess_repos(df, commit_data):
    df['commit_count'] = commit_data['commits']
    df['score'] = df.apply(lambda row: score_repository(row['stars'], row['forks'], row['commit_count']), axis=1)
    return df.sort_values(by='score', ascending=False)


code originality


In [1]:
import difflib
import ast
import re

def normalize_code(code):
    # Remove comments, extra spaces, etc.
    code = re.sub(r'#.*', '', code)  # remove single-line comments
    code = re.sub(r'\s+', ' ', code)  # normalize whitespace
    return code.strip()

def plagiarism_score(code1, code2):
    code1 = normalize_code(code1)
    code2 = normalize_code(code2)
    matcher = difflib.SequenceMatcher(None, code1, code2)
    return matcher.ratio() * 100  # percentage similarity

def extract_code_structure(code):
    try:
        tree = ast.parse(code)
        return [type(node).__name__ for node in ast.walk(tree)]
    except SyntaxError:
        return []

def compare_ast_structure(structure1, structure2):
    overlap = set(structure1).intersection(set(structure2))
    total = set(structure1).union(set(structure2))
    return len(overlap) / len(total) * 100 if total else 0

def detect_ai_style(code):
    # Basic heuristic: check for patterns commonly produced by LLMs
    patterns = [
        "This function", "The purpose of this", "Args:", "Returns:", "Example:", "Usage:"
    ]
    for pattern in patterns:
        if pattern.lower() in code.lower():
            return True
    return False

def assess_code_originality(submitted_code, reference_code_list):
    result = {
        'ai_signature_detected': detect_ai_style(submitted_code),
        'highest_plagiarism_score': 0,
        'closest_match_repo': None,
        'ast_similarity': 0
    }

    for name, ref_code in reference_code_list.items():
        plag_score = plagiarism_score(submitted_code, ref_code)
        ast_sim = compare_ast_structure(
            extract_code_structure(submitted_code),
            extract_code_structure(ref_code)
        )
        if plag_score > result['highest_plagiarism_score']:
            result['highest_plagiarism_score'] = plag_score
            result['closest_match_repo'] = name
        if ast_sim > result['ast_similarity']:
            result['ast_similarity'] = ast_sim

    return result

# 🧪 Example usage
if __name__ == "__main__":
    submitted_code = '''
    def add(x, y):
        # This function returns sum
        return x + y
    '''

    reference_code_list = {
        'utils_repo': 'def sum_func(a, b): return a + b',
        'ai_code_gen': 'def add(x, y): # This function returns sum return x + y'
    }

    originality_result = assess_code_originality(submitted_code, reference_code_list)
    print(originality_result)


{'ai_signature_detected': True, 'highest_plagiarism_score': 68.29268292682927, 'closest_match_repo': 'ai_code_gen', 'ast_similarity': 0}
