In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
pip install google-api-python-client google-auth-httplib2 google-auth-oauthlib

In [None]:
pip install PyPDF2

Collecting PyPDF2
  Downloading pypdf2-3.0.1-py3-none-any.whl.metadata (6.8 kB)
Downloading pypdf2-3.0.1-py3-none-any.whl (232 kB)
[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/232.6 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [91m━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[90m╺[0m[90m━━━━━━━━━━━━━[0m [32m153.6/232.6 kB[0m [31m5.0 MB/s[0m eta [36m0:00:01[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m232.6/232.6 kB[0m [31m4.0 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: PyPDF2
Successfully installed PyPDF2-3.0.1


In [10]:
import os
import time
import google.generativeai as genai
from google.colab import drive, userdata
# Configure the Gemini API
import csv

genai.configure(api_key="your google api key")

drive.mount('/content/drive')

def upload_to_gemini(path, mime_type=None):
    """Uploads the given file to Gemini."""
    file = genai.upload_file(path, mime_type=mime_type)
    print(f"Uploaded file '{file.display_name}' as: {file.uri}")
    return file

def wait_for_files_active(files):
    """Waits for the given files to be active."""
    print("Waiting for file processing...")
    for name in (file.name for file in files):
        file = genai.get_file(name)
        while file.state.name == "PROCESSING":
            print(".", end="", flush=True)
            time.sleep(10)
            file = genai.get_file(name)
        if file.state.name != "ACTIVE":
            raise Exception(f"File {file.name} failed to process")
    print("...all files ready")
    print()

# Model configuration
generation_config = {
    "temperature": 1,
    "top_p": 0.95,
    "top_k": 40,
    "max_output_tokens": 8192,
    "response_mime_type": "text/plain",
}

model = genai.GenerativeModel(
    model_name="gemini-2.0-flash-exp",
    generation_config=generation_config,
    system_instruction=(
        "You are an expert at evaluating research papers. Judge if the given paper is publishable or not. "
        "In the output, give the PDF name along with the publishability label (0 for non-publishable, "
        "1 for publishable). The paper’s content, methodology, and findings should correspond to the themes, "
        "focus areas, academic criteria, and quality standards of prestigious conferences. A non-publishable "
        "paper may contain irrelevant, nonsensical, or contradictory content.ONLY GIVE THE OUTPUT AS 0 OR 1"
    ),
)

# Specify the folder containing the files
folder_path = "/content/drive/MyDrive/final_ref"

#with zipfile.ZipFile(folder_path, 'r') as zip_ref:
#    zip_ref.extractall("/tmp/papers")  # Extract to /tmp/papers

#folder_path = "/tmp/papers"

# Get all files from the folder
file_paths = [os.path.join(folder_path, file) for file in os.listdir(folder_path) if file.endswith(".pdf")]

def save_results_to_csv(results, output_csv="evaluation_results.csv"):
    """Saves the evaluation result to a CSV file."""
    with open(output_csv, mode="w", newline="", encoding="utf-8") as file:
        writer = csv.DictWriter(file,fieldnames=["Paper ID", "Publishable"])
        writer.writeheader()
        writer.writerows(results)
    print(f"Results saved to {output_csv}")

# Process files one by one
results = []
#file_paths=['/content/R003.pdf']
for file_path in file_paths:
    print(f"Processing file: {file_path}")
    try:
        # Upload the file to Gemini
        file = upload_to_gemini(file_path, mime_type="application/pdf")

        # Wait for the file to be ready
        wait_for_files_active([file])

        # Start a chat session and send the file for evaluation
        chat_session = model.start_chat(
            history=[
                {"role": "user", "parts": [file]},
            ]
        )
        response = chat_session.send_message("Please evaluate this paper.Is it Publishable or Not?")

        # Append the result
        paper_name=file.display_name
        publishable=response.text.strip()
        eval_result={"Paper ID":paper_name,"Publishable":publishable}
        results.append(eval_result)
        #print(f"{file.display_name}: {response.text}")

    except Exception as e:
        print(f"Error processing file {file_path}: {e}")
        results.append(f"{file_path}: Error during processing")


# Print the results
print("\nEvaluation Results:")
for result in results:
    print(result)

save_results_to_csv(results)


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
Processing file: /content/drive/MyDrive/final_ref/R001.pdf
Uploaded file 'R001.pdf' as: https://generativelanguage.googleapis.com/v1beta/files/yjz4qntmorph
Waiting for file processing...
...all files ready

Processing file: /content/drive/MyDrive/final_ref/R007.pdf
Uploaded file 'R007.pdf' as: https://generativelanguage.googleapis.com/v1beta/files/bhte19ps9ofx
Waiting for file processing...
...all files ready

Processing file: /content/drive/MyDrive/final_ref/R014.pdf
Uploaded file 'R014.pdf' as: https://generativelanguage.googleapis.com/v1beta/files/wr8u7yqcjgwf
Waiting for file processing...
...all files ready

Processing file: /content/drive/MyDrive/final_ref/R013.pdf
Uploaded file 'R013.pdf' as: https://generativelanguage.googleapis.com/v1beta/files/pw1ij2fe9llp
Waiting for file processing...
...all files ready

Processing file: /content/drive/MyDrive/fin

In [12]:
from sklearn.metrics import accuracy_score, f1_score

# Define ground truth labels for the 15 reference papers
# Replace these with your actual ground truth labels
ground_truth_labels = {
    "R001.pdf": 0,
    "R002.pdf": 0,
    "R003.pdf": 0,
    "R004.pdf": 0,
    "R005.pdf": 0,
    "R006.pdf": 1,
    "R007.pdf": 1,
    "R008.pdf": 1,
    "R009.pdf": 1,
    "R010.pdf": 1,
    "R011.pdf": 1,
    "R012.pdf": 1,
    "R013.pdf": 1,
    "R014.pdf": 1,
    "R015.pdf": 1
}
from sklearn.metrics import accuracy_score, f1_score, classification_report

# Extract ground truth and predictions
true_labels = []
predicted_labels = []

for result in results:
    if isinstance(result, dict):  # Ignore errors in the results
        paper_id = result["Paper ID"]
        if paper_id in ground_truth_labels:
            true_labels.append(ground_truth_labels[paper_id])
            predicted_labels.append(int(result["Publishable"]))  # Convert to int

# Calculate accuracy and F1 score
accuracy = accuracy_score(true_labels, predicted_labels)
f1 = f1_score(true_labels, predicted_labels)

# Print metrics
print("\nEvaluation Metrics:")
print(f"Accuracy: {accuracy * 100:.2f}%")
print(f"F1 Score: {f1:.2f}")

# Optional: Detailed classification report
print("\nClassification Report:")
print(classification_report(true_labels, predicted_labels, target_names=["Non-Publishable", "Publishable"]))





Evaluation Metrics:
Accuracy: 93.33%
F1 Score: 0.95

Classification Report:
                 precision    recall  f1-score   support

Non-Publishable       1.00      0.80      0.89         5
    Publishable       0.91      1.00      0.95        10

       accuracy                           0.93        15
      macro avg       0.95      0.90      0.92        15
   weighted avg       0.94      0.93      0.93        15



In [13]:
import os
import time
import google.generativeai as genai
from google.colab import drive, userdata
# Configure the Gemini API
import csv

genai.configure(api_key="AIzaSyAbAPEDYFqAAc3f09Ow5OIscgx8Y5OZ_74")

drive.mount('/content/drive')

def upload_to_gemini(path, mime_type=None):
    """Uploads the given file to Gemini."""
    file = genai.upload_file(path, mime_type=mime_type)
    print(f"Uploaded file '{file.display_name}' as: {file.uri}")
    return file

def wait_for_files_active(files):
    """Waits for the given files to be active."""
    print("Waiting for file processing...")
    for name in (file.name for file in files):
        file = genai.get_file(name)
        while file.state.name == "PROCESSING":
            print(".", end="", flush=True)
            time.sleep(10)
            file = genai.get_file(name)
        if file.state.name != "ACTIVE":
            raise Exception(f"File {file.name} failed to process")
    print("...all files ready")
    print()

# Model configuration
generation_config = {
    "temperature": 1,
    "top_p": 0.95,
    "top_k": 40,
    "max_output_tokens": 8192,
    "response_mime_type": "text/plain",
}

model = genai.GenerativeModel(
    model_name="gemini-2.0-flash-exp",
    generation_config=generation_config,
    system_instruction=(
        "You are an expert at evaluating research papers. Judge if the given paper is publishable or not. "
        "In the output, give the PDF name along with the publishability label (0 for non-publishable, "
        "1 for publishable). The paper’s content, methodology, and findings should correspond to the themes, "
        "focus areas, academic criteria, and quality standards of prestigious conferences. A non-publishable "
        "paper may contain irrelevant, nonsensical, or contradictory content.ONLY GIVE THE OUTPUT AS 0 OR 1"
    ),
)

# Specify the folder containing the files
folder_path = "/content/drive/MyDrive/Papers-20250112T180905Z-001/Papers"

#with zipfile.ZipFile(folder_path, 'r') as zip_ref:
#    zip_ref.extractall("/tmp/papers")  # Extract to /tmp/papers

#folder_path = "/tmp/papers"

# Get all files from the folder
file_paths = [os.path.join(folder_path, file) for file in os.listdir(folder_path) if file.endswith(".pdf")]

def save_results_to_csv(results, output_csv="evaluation_results.csv"):
    """Saves the evaluation result to a CSV file."""
    with open(output_csv, mode="w", newline="", encoding="utf-8") as file:
        writer = csv.DictWriter(file,fieldnames=["Paper ID", "Publishable"])
        writer.writeheader()
        writer.writerows(results)
    print(f"Results saved to {output_csv}")

# Process files one by one
results = []
#file_paths=['/content/R003.pdf']
for file_path in file_paths:
    print(f"Processing file: {file_path}")
    try:
        # Upload the file to Gemini
        file = upload_to_gemini(file_path, mime_type="application/pdf")

        # Wait for the file to be ready
        wait_for_files_active([file])

        # Start a chat session and send the file for evaluation
        chat_session = model.start_chat(
            history=[
                {"role": "user", "parts": [file]},
            ]
        )
        response = chat_session.send_message("Please evaluate this paper.Is it Publishable or Not?")

        # Append the result
        paper_name=file.display_name
        publishable=response.text.strip()
        eval_result={"Paper ID":paper_name,"Publishable":publishable}
        results.append(eval_result)
        #print(f"{file.display_name}: {response.text}")

    except Exception as e:
        print(f"Error processing file {file_path}: {e}")
        results.append(f"{file_path}: Error during processing")


# Print the results
print("\nEvaluation Results:")
for result in results:
    print(result)

save_results_to_csv(results)


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
Processing file: /content/drive/MyDrive/Papers-20250112T180905Z-001/Papers/P095.pdf
Uploaded file 'P095.pdf' as: https://generativelanguage.googleapis.com/v1beta/files/wjx9yxwvi2ip
Waiting for file processing...
...all files ready

Processing file: /content/drive/MyDrive/Papers-20250112T180905Z-001/Papers/P012.pdf
Uploaded file 'P012.pdf' as: https://generativelanguage.googleapis.com/v1beta/files/yj5hyovwid6h
Waiting for file processing...
...all files ready

Processing file: /content/drive/MyDrive/Papers-20250112T180905Z-001/Papers/P014.pdf
Uploaded file 'P014.pdf' as: https://generativelanguage.googleapis.com/v1beta/files/9rs8gptjb5za
Waiting for file processing...
...all files ready

Processing file: /content/drive/MyDrive/Papers-20250112T180905Z-001/Papers/P052.pdf
Uploaded file 'P052.pdf' as: https://generativelanguage.googleapis.com/v1beta/files/u7129tb

In [20]:
import os
import shutil
import pandas as pd

# Paths
input_folder = "/content/drive/MyDrive/Papers-20250112T180905Z-001/Papers"
output_folder = "/content/drive/MyDrive/Publishable"
csv_file_path = "/content/papers_evaluation.csv"  # Update to actual CSV file path if different

# Create the output folder if it doesn't exist
os.makedirs(output_folder, exist_ok=True)

# Read the CSV file
df = pd.read_csv(csv_file_path)

# Filter the Paper IDs with Publishable = 1
publishable_papers = df[df['Publishable'] == 1]['Paper ID']

# Copy files to the new folder
for paper_id in publishable_papers:
    source_path = os.path.join(input_folder, paper_id)
    destination_path = os.path.join(output_folder, paper_id)

    if os.path.exists(source_path):
        shutil.copy(source_path, destination_path)
        print(f"Copied {paper_id} to {output_folder}")
    else:
        print(f"File not found: {paper_id}")

print(f"All publishable papers have been copied to {output_folder}.")

Copied P001.pdf to /content/drive/MyDrive/Publishable
Copied P004.pdf to /content/drive/MyDrive/Publishable
Copied P005.pdf to /content/drive/MyDrive/Publishable
Copied P007.pdf to /content/drive/MyDrive/Publishable
Copied P008.pdf to /content/drive/MyDrive/Publishable
Copied P009.pdf to /content/drive/MyDrive/Publishable
Copied P010.pdf to /content/drive/MyDrive/Publishable
Copied P011.pdf to /content/drive/MyDrive/Publishable
Copied P012.pdf to /content/drive/MyDrive/Publishable
Copied P013.pdf to /content/drive/MyDrive/Publishable
Copied P014.pdf to /content/drive/MyDrive/Publishable
Copied P015.pdf to /content/drive/MyDrive/Publishable
Copied P016.pdf to /content/drive/MyDrive/Publishable
Copied P017.pdf to /content/drive/MyDrive/Publishable
Copied P018.pdf to /content/drive/MyDrive/Publishable
Copied P019.pdf to /content/drive/MyDrive/Publishable
Copied P021.pdf to /content/drive/MyDrive/Publishable
Copied P023.pdf to /content/drive/MyDrive/Publishable
Copied P024.pdf to /content/