In [1]:
%pip install -q -U google-generativeai
import google.generativeai as genai
import os

Note: you may need to restart the kernel to use updated packages.


  from .autonotebook import tqdm as notebook_tqdm


In [9]:
import pandas as pd

def complete_with_gemini(csv_filename, output_filename):
    """
    Adds a 'gender' column to the input CSV file by querying the Google Cloud API based on the name and location.

    Args:
        csv_filename (str): The input CSV file path.
        output_filename (str): The output CSV file path with the new 'gender' column.

    Returns:
        pd.DataFrame: The updated DataFrame with the 'gender' column.
    """

    os.environ["GEMINI_API_KEY"] = 'AIzaSyBw0iXSqYTIWT4ZUMbbe7_-4wdA_P8CYJI'
    genai.configure(api_key=os.environ["GEMINI_API_KEY"])

    # Read the CSV file
    df = pd.read_csv(csv_filename)

    model = genai.GenerativeModel("gemini-1.5-pro")

    # Add a 'gender' column to the DataFrame
    genders = []
    for _, row in df.iterrows():
        name = row['name']
        location = row.get('location', '')  # Default to an empty string if 'location' is missing

        # Construct the prompt
        prompt = f"""
        Given the name '{name}' and, if not none, the location '{location}',
        tell me the gender of the username.
        - The input is a name and maybe a location too.
        - The output must be 'Male', 'Female', or
        'Unknown'. Without any explanations or reasons. Just the predicted gender.
        - Examples:
          - Christopher Pirillo: Male
          - Vera AxelRod: Female
          - Vish Ganapathy: Unknown
        """

        try:

            generation_config = {
              "temperature": 1,
              "top_p": 0.95,
              "top_k": 64,
              "max_output_tokens": 8192,
              "response_mime_type": "text/plain",
              }

            response = model.generate_content(prompt, generation_config=generation_config)
            #print(response.text)

            # Parse the result
            result = response.text
            genders.append(result)

        except Exception as e:
            print(f"Error processing {name}: {e}")
            genders.append("Error")  # Append "Error" in case of failure

    # Add the genders to the DataFrame
    df['gender'] = genders

    # Save the updated DataFrame to a new CSV file
    df.to_csv(output_filename, index=False)

    return df

In [10]:
import os
os.makedirs('complete_organisations', exist_ok=True)

# Define source and destination folders
input_folder = '/home/maria/Escriptori/I2RCED/i2rced/organisations/organisations_with_location'
output_folder = '/home/maria/Escriptori/I2RCED/i2rced/organisations/complete_organisations'

for file_name in os.listdir(input_folder):
    if file_name.endswith('.csv'):
        input_file = os.path.join(input_folder, file_name)
        output_file = os.path.join(output_folder, f'{file_name}_complete.csv')
            
        complete_with_gemini(input_file, output_file)
        print(f"Processed: {file_name}")

Processed: GHlocation_sentence-transformers_members.csv
Processed: GHlocation_Skywork_members.csv
Processed: GHlocation_mlc-ai_members.csv
Processed: GHlocation_deepset_members.csv
Processed: GHlocation_speechbrain_members.csv
Processed: GHlocation_vinai_members.csv
Processed: GHlocation_tasksource_members.csv
Processed: GHlocation_BAAI_members.csv
Processed: GHlocation_mtg-upf_members.csv
Processed: GHlocation_autogluon_members.csv
Processed: GHlocation_cross-encoder_members.csv
Processed: GHlocation_McGill-NLP_members.csv
Processed: GHlocation_Lajavaness_members.csv
Processed: GHlocation_CofeAI_members.csv
Processed: GHlocation_bigcode_members.csv
Processed: GHlocation_lmsys_members.csv
Processed: GHlocation_NeuML_members.csv
Processed: GHlocation_stablediffusionapi_members.csv
Processed: GHlocation_audeering_members.csv
Processed: GHlocation_reazon-research_members.csv
Processed: GHlocation_albert_members.csv
Processed: GHlocation_allenai_members.csv
Processed: GHlocation_InstantX_m