In [1]:
import google.generativeai as genai
import pandas as pd
import numpy as np
import re
from collections import Counter
import random
import math
from sklearn.metrics import cohen_kappa_score
from sklearn.metrics import accuracy_score
from sklearn.metrics import root_mean_squared_error
from sklearn.metrics import classification_report

In [2]:
def extract_first_number(s):
    """
    Extracts the first integer from a string, ignoring decimals.

    Args:
        s: The input string.

    Returns:
        The first integer found in the string, or None if no integer is found.
    """
    match = re.search(r'\d+', s)  # Matches only integers
    if match:
        return int(match.group())
    else:
        return None


## Find modes ###
def find_mode(data):
    """
    Finds the mode(s) of a list or pandas Series.
    
    Args:
        data: A list or pandas Series of values.
    
    Returns:
        A single mode if there are multiple modes;
        otherwise returns the mode(s) of the input data.
        Returns an empty list if the input is empty.
    """
    counts = Counter(data)
    max_count = max(counts.values())
    modes = [key for key, value in counts.items() if value == max_count]
    
    # If there are multiple modes, choose one randomly
    if len(modes) > 1:
        return random.choice(modes)
    
    return modes[0]

def SMD(truevalues,prediction):
    mt=truevalues.mean()
    mp=prediction.mean()
    stdt=truevalues.var()
    stdp=prediction.var()
    z= (mt-mp)/math.sqrt((stdt+stdt)/2)

    return(z)

In [3]:
path = 'C:/'
role = "You are a helpful rater in science education. You should rate students' responses based on the scoring guide and only return the scores.\n"

item='VR2'
scoring_prompt = pd.read_csv(path+'scoring prompts.csv')
scoring_prompt = scoring_prompt.loc[scoring_prompt.Item == item,'Prompt'].values[0]
resp = pd.read_excel(path+'All Graded Data from LPS3 2022_ Undergraduates.xlsx',sheet_name=item)
resp.replace({'Final': {'0a': 0}},inplace=True)
resp.replace({'Final': {'0b': 0}},inplace=True)
#resp['Final']=resp['Final'].astype(int)


  resp.replace({'Final': {'0b': 0}},inplace=True)


In [5]:
genai.configure(api_key="Input your own key")
gm_model = genai.GenerativeModel(model_name="gemini-2.0-flash")

In [6]:
temprs = [0,1]
nrep=5

for tempr in temprs:
    scP = []
    for index, row in resp.iterrows():
    #print(index)
        scoring_prompt_resp = scoring_prompt+'\n'+'RESPONSE:'+str(row[item])+'. ->'
        scL=[]
        for rp in range(nrep):
        
            gm_scores = gm_model.generate_content(
                        generation_config={"temperature": tempr},
                        contents=role+'\n'+scoring_prompt_resp,
            )
            scL.append(gm_scores.text)

        scP.append(scL)

    gm_scD = pd.DataFrame(scP,columns=['output_'+str(i )for i in range(nrep)])

    gm_scD = pd.concat([gm_scD,gm_scD.apply(lambda col: [extract_first_number(i) for i in col]).\
    rename(columns=dict(zip(['output_'+str(i )for i in range(nrep)],['score_'+str(i )for i in range(nrep)])))], axis=1)

    gm_scD['mode'] = gm_scD.iloc[:,nrep:(2*nrep)].apply(find_mode,axis=1)

    gm_scD.to_csv(path+'results/'+item+'_gemini_tempr'+str(tempr)+'.csv')



In [7]:
print(f"QWK: {cohen_kappa_score(gm_scD['mode'],resp.Final,weights='quadratic')}")
print(f"Accuracy: {accuracy_score(gm_scD['mode'],resp.Final)}")
print(f"rmse: {root_mean_squared_error(gm_scD['mode'],resp.Final)}")
print(f"SMD: {SMD(prediction=gm_scD['mode'],truevalues=resp.Final)}")
print(classification_report(gm_scD['mode'],resp.Final))

QWK: 0.7941068320545192
Accuracy: 0.788546255506608
rmse: 0.4944627307193987
SMD: -0.07571254023554079
              precision    recall  f1-score   support

           0       0.69      0.87      0.77        55
           1       0.64      0.60      0.62       109
           2       0.88      0.87      0.87       272
           3       0.62      0.44      0.52        18

    accuracy                           0.79       454
   macro avg       0.71      0.70      0.69       454
weighted avg       0.79      0.79      0.79       454

