In [24]:
import os
from dotenv import load_dotenv
import google.generativeai as genai
import pandas as pd

load_dotenv()

GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY")
genai.configure(api_key=GOOGLE_API_KEY)
model = genai.GenerativeModel('gemini-1.5-flash')

In [25]:
data = pd.read_csv('dataset.csv').head(10)

results = []

for index, row in data.iterrows():
    code_snippet = row['code']

    prompt = f"""
    Classify the following Python code as '0' for clean or '1' for code smell based on the following criteria. Be strict so that atleast 40 percent of code is smelly:
    1. High levels of nesting in conditional statements or loops.
    2. Code repetition or duplicated logic.
    3. Long functions that do too many things.
    4. Lack of clear variable and function names.
    5. Any other factors that make the code hard to understand, maintain, or extend.

    Please return just '0' or '1'. The code is:

    {code_snippet}
    """

    response = model.generate_content(prompt)
    result = response.text.strip()
    results.append(result)

data['classification'] = results

In [26]:
data

Unnamed: 0,code,classification
0,"n, m = map(int,input().split())\ncount = 0\nl ...",1
1,"MOD=10**9+7\nUPPERLIMIT=2*10**5\nMODMUL=[1, 1]...",1
2,"T,X=map(int,input().split())\nres = T/X\nprint...",0
3,def is_prime(n):\n if n == 2: return True\n ...,0
4,a = input()\na = 48 - a\nprint a,0
5,"n, k = map(int, input().split())\n\ndef cul(x)...",1
6,"a = list(map(int, input().split()))\ncount = 0...",1
7,# -*- coding: utf-8 -*-\n\n\ndef main():\n ...,0
8,"def dfs(parent, connect, passed, N): #深さ優先探索\n...",0
9,import math\narr = input().split( )\na = int(a...,0


In [27]:
data.to_csv('dataset_labeled.csv', index=False)