In [None]:
import pandas as pd

# Load the CSV file with researcher information
researcher_data_path = "XLSX/authors_data_teknikindustri.csv"  # Update with the actual path
researcher_data = pd.read_csv(researcher_data_path)

# Iterate over each researcher based on SINTA_ID
for _, row in researcher_data.iterrows():
    researcher_id = str(row['SINTA_ID'])
    author_name = row['Author_Name']
    university = row['University']
    department = row['Department']
    
    # Define file paths for each researcher's specific files
    file_paths = [
        f"XLSX/wos_{researcher_id}_articles.xlsx",
        f"XLSX/garuda_{researcher_id}_articles.xlsx",
        f"XLSX/googlescholar_{researcher_id}_articles.xlsx",
        f"XLSX/scopus_{researcher_id}_articles.xlsx"
    ]
    
    # Initialize an empty DataFrame for combining data
    combined_df = pd.DataFrame()

    # Process each file for the current researcher
    for file_path in file_paths:
        # Determine the source name from the file path
        source_name = file_path.split('/')[-1].split('_')[0].capitalize()
        
        try:
            # Load the Excel file
            df = pd.read_excel(file_path)
            
            # Add "Source", "Author_Name", "University", and "Department" columns
            df['Source'] = source_name
            df['Author_Name'] = author_name
            df['University'] = university
            df['Department'] = department
            
            # Concatenate the data with combined_df
            combined_df = pd.concat([combined_df, df], ignore_index=True)
        
        except FileNotFoundError:
            print(f"File {file_path} not found for researcher {researcher_id}. Skipping this source.")
        except Exception as e:
            print(f"An error occurred with {file_path} for researcher {researcher_id}: {e}")
    
    # Save the combined data to a new Excel file for each researcher using their name in the file name
    safe_author_name = author_name.replace(" ", "_")  # Replace spaces with underscores for file naming
    output_path = f"XLSX/combined_articles_with_source_{safe_author_name}.xlsx"
    combined_df.to_excel(output_path, index=False)
    print(f"Combined file for {author_name} (SINTA_ID {researcher_id}) saved at: {output_path}")
