#### Generate files

Logic: I generated 5 files, 

In [8]:
import pandas as pd
import random
import os
import math

# Create 'cases' folder if it doesn't exist
if not os.path.exists("cases"):
    os.makedirs("cases")

# List of NTU departments in Mandarin
departments = [
    "中國文學系", "外國語文學系", "歷史學系", "哲學系", "人類學系", "圖書資訊學系", "日本語文學系", "戲劇學系",
    "數學系", "物理學系", "化學系", "地質科學系", "大氣科學系", "地理環境資源學系", "心理學系",
    "經濟學系", "社會學系", "政治學系", "社會工作學系",
    "醫學系", "牙醫學系", "藥學系", "醫學檢驗暨生物技術學系", "護理學系", "物理治療學系", "職能治療學系",
    "土木工程學系", "機械工程學系", "化學工程學系", "工程科學及海洋工程學系", "材料科學與工程學系",
    "電機工程學系", "資訊工程學系",
    "農藝學系", "生物環境系統工程學系", "農業化學系", "森林環境暨資源學系", "動物科學技術學系",
    "農業經濟學系", "園藝學系", "獸醫學系", "生物產業傳播暨發展學系", "昆蟲學系",
    "工商管理學系", "會計學系", "財務金融學系", "國際企業學系", "資訊管理學系",
    "公共衛生學系",
    "法律學系",
    "生命科學系", "生化科技學系",
    "應用數學科學研究所", "統計與數據科學研究所", "海洋研究所", "天文物理研究所",
    "生物化學及分子生物學系", "微生物學系", "藥理學系", "植物科學研究所", "建築與城鄉研究所"
]

# List of referee names
referees = [
    "小馬", "恩臨", "羿君", "茵茵", "芳芳", "阿冠", "小魚", "yoyo", "家葳", "絲瓜",
    "大餅", "手槍", "阿程", "阿侑", "阿宛"
]

# Function to generate data for the 'level' sheet (for files 1-4)
def generate_level_data(num_rows, level_distribution=None):
    # If no specific level distribution, spread evenly across levels 1-4
    if level_distribution is None:
        base_count = num_rows // 4
        remainder = num_rows % 4
        levels = ([1] * base_count + [2] * base_count + [3] * base_count + [4] * base_count +
                  [i + 1 for i in range(remainder)])
    else:
        levels = []
        for level, count in enumerate(level_distribution, 1):
            levels.extend([level] * count)
    
    # Shuffle departments to avoid repetition patterns
    available_depts = random.sample(departments * (num_rows // len(departments) + 1), num_rows)
    
    # Create data
    data = {
        "team_name": available_depts[:num_rows],
        "level": levels
    }
    
    # Shuffle to randomize order
    df = pd.DataFrame(data)
    df = df.sample(frac=1).reset_index(drop=True)
    return df

# Function to generate data for the 'level' sheet for case_5 (40 teams, 3 departments with 2 teams)
def generate_level_data_case5():
    # Select 3 departments for double teams, including 土木工程學系
    double_depts = random.sample([d for d in departments if d != "土木工程學系"], 2) + ["土木工程學系"]
    
    # Create team names for double departments (A and B teams)
    team_names = []
    levels = []
    for dept in double_depts:
        team_names.append(f"{dept}A")
        team_names.append(f"{dept}B")
        # Assign A team a better level (1 or 2) and B team a worse level (3 or 4)
        a_level = random.choice([1, 2])
        b_level = random.choice([3, 4])
        levels.extend([a_level, b_level])
    
    # Select 34 unique departments for the remaining teams
    remaining_depts = random.sample([d for d in departments if d not in double_depts], 34)
    team_names.extend(remaining_depts)
    
    # Assign remaining levels to achieve even distribution (10 per level for 40 teams)
    remaining_levels = []
    target_per_level = 10  # 40 teams / 4 levels
    current_counts = {1: levels.count(1), 2: levels.count(2), 3: levels.count(3), 4: levels.count(4)}
    for level in range(1, 5):
        needed = target_per_level - current_counts.get(level, 0)
        remaining_levels.extend([level] * needed)
    
    # Combine and shuffle
    levels.extend(remaining_levels)
    data = {
        "team_name": team_names,
        "level": levels
    }
    df = pd.DataFrame(data)
    df = df.sample(frac=1).reset_index(drop=True)
    return df

# Function to generate data for the 'ref_team' sheet
def generate_ref_team_data():
    # Randomly assign departments to referees
    assigned_depts = random.sample(departments * (len(referees) // len(departments) + 1), len(referees))
    data = {
        "ref_name": referees,
        "team_name": assigned_depts
    }
    df = pd.DataFrame(data)
    return df

# Function to generate data for the 'group' sheet
def generate_group_data(level_df, case_number=1):
    num_teams = len(level_df)
    n = num_teams // 4
    remainder = num_teams % 4
    
    # Determine number of four-team and three-team groups
    if remainder == 0:  # 4n teams
        num_four_team_groups = n
        num_three_team_groups = 0
    elif remainder == 1:  # 4n+1 teams
        num_four_team_groups = n - 2
        num_three_team_groups = 3
    elif remainder == 2:  # 4n+2 teams
        num_four_team_groups = n - 1
        num_three_team_groups = 2
    else:  # 4n+3 teams
        num_four_team_groups = n
        num_three_team_groups = 1
    
    # Initialize groups
    groups = []
    group_labels = [chr(65 + i) for i in range(num_four_team_groups + num_three_team_groups)]  # A, B, C, ...
    
    # Create a team-to-level mapping for the level column
    team_to_level = dict(zip(level_df['team_name'], level_df['level']))
    
    # Group teams by level
    teams_by_level = {i: level_df[level_df['level'] == i]['team_name'].tolist() for i in range(1, 5)}
    
    # For case_5, identify A/B team pairs to avoid placing them in the same group
    ab_pairs = {}
    if case_number == 5:
        for team in level_df['team_name']:
            if team.endswith('A') or team.endswith('B'):
                base_dept = team[:-1]
                ab_pairs.setdefault(base_dept, []).append(team)
    
    # Create four-team groups (one team per level)
    for i in range(num_four_team_groups):
        group = []
        used_depts = set()
        for level in range(1, 5):
            if teams_by_level[level]:
                # For case_5, ensure no A/B pairs in the same group
                if case_number == 5:
                    available_teams = [
                        t for t in teams_by_level[level]
                        if not any(
                            base_dept in ab_pairs and
                            t != ab_pairs[base_dept][0] and
                            ab_pairs[base_dept][0] in group
                            for base_dept in ab_pairs
                        )
                    ]
                    if not available_teams:
                        available_teams = teams_by_level[level]  # Fallback if no valid teams
                else:
                    available_teams = teams_by_level[level]
                
                if available_teams:
                    team = random.choice(available_teams)
                    teams_by_level[level].remove(team)
                    group.append(team)
                    # Track base department for case_5
                    if case_number == 5:
                        base_dept = team[:-1] if team.endswith(('A', 'B')) else team
                        used_depts.add(base_dept)
        groups.append((group_labels[i], group))
    
    # Create three-team groups (maximize level diversity)
    remaining_teams = []
    for level in teams_by_level:
        remaining_teams.extend([(team, level) for team in teams_by_level[level]])
    
    for i in range(num_three_team_groups):
        group = []
        used_depts = set()
        # Try to pick 3 teams with different levels
        level_counts = {i: len(teams_by_level[i]) for i in range(1, 5)}
        available_levels = sorted([l for l in level_counts if level_counts[l] > 0], key=lambda x: level_counts[x], reverse=True)
        
        # Select up to 3 different levels
        selected_teams = []
        for level in available_levels:
            if len(selected_teams) < 3 and teams_by_level[level]:
                # For case_5, ensure no A/B pairs in the same group
                if case_number == 5:
                    available_teams = [
                        t for t in teams_by_level[level]
                        if not any(
                            base_dept in ab_pairs and
                            t != ab_pairs[base_dept][0] and
                            ab_pairs[base_dept][0] in group
                            for base_dept in ab_pairs
                        )
                    ]
                else:
                    available_teams = teams_by_level[level]
                
                if available_teams:
                    team = random.choice(available_teams)
                    teams_by_level[level].remove(team)
                    selected_teams.append(team)
                    if case_number == 5:
                        base_dept = team[:-1] if team.endswith(('A', 'B')) else team
                        used_depts.add(base_dept)
        
        # If fewer than 3 teams, fill with remaining teams
        while len(selected_teams) < 3 and remaining_teams:
            available_teams = [
                (t, l) for t, l in remaining_teams
                if case_number != 5 or not any(
                    base_dept in ab_pairs and
                    t != ab_pairs[base_dept][0] and
                    ab_pairs[base_dept][0] in selected_teams
                    for base_dept in ab_pairs
                )
            ]
            if available_teams:
                team, level = random.choice(available_teams)
                selected_teams.append(team)
                teams_by_level[level].remove(team)
                remaining_teams = [(t, l) for t, l in remaining_teams if t != team]
        
        groups.append((group_labels[num_four_team_groups + i], selected_teams))
    
    # Create DataFrame for group sheet with level column
    group_data = []
    for group_label, group_teams in groups:
        for team in group_teams:
            group_data.append({
                "group": group_label,
                "team_name": team,
                "level": team_to_level[team]
            })
    
    df = pd.DataFrame(group_data)
    return df

# Generate and save the five files
# File 1: 16 rows, 4 per level, with ref_team and group sheets
data1_level = generate_level_data(16, [4, 4, 4, 4])
data1_ref = generate_ref_team_data()
data1_group = generate_group_data(data1_level, case_number=1)
with pd.ExcelWriter("cases/case_1.xlsx", engine="openpyxl") as writer:
    data1_level.to_excel(writer, sheet_name="level", index=False)
    data1_ref.to_excel(writer, sheet_name="ref_team", index=False)
    data1_group.to_excel(writer, sheet_name="group", index=False)

# File 2: 38 rows, even distribution, with ref_team and group sheets
data2_level = generate_level_data(38)
data2_ref = generate_ref_team_data()
data2_group = generate_group_data(data2_level, case_number=2)
with pd.ExcelWriter("cases/case_2.xlsx", engine="openpyxl") as writer:
    data2_level.to_excel(writer, sheet_name="level", index=False)
    data2_ref.to_excel(writer, sheet_name="ref_team", index=False)
    data2_group.to_excel(writer, sheet_name="group", index=False)

# File 3: 25 rows, even distribution, with ref_team and group sheets
data3_level = generate_level_data(25)
data3_ref = generate_ref_team_data()
data3_group = generate_group_data(data3_level, case_number=3)
with pd.ExcelWriter("cases/case_3.xlsx", engine="openpyxl") as writer:
    data3_level.to_excel(writer, sheet_name="level", index=False)
    data3_ref.to_excel(writer, sheet_name="ref_team", index=False)
    data3_group.to_excel(writer, sheet_name="group", index=False)

# File 4: 31 rows, even distribution, with ref_team and group sheets
data4_level = generate_level_data(31)
data4_ref = generate_ref_team_data()
data4_group = generate_group_data(data4_level, case_number=4)
with pd.ExcelWriter("cases/case_4.xlsx", engine="openpyxl") as writer:
    data4_level.to_excel(writer, sheet_name="level", index=False)
    data4_ref.to_excel(writer, sheet_name="ref_team", index=False)
    data4_group.to_excel(writer, sheet_name="group", index=False)

# File 5: 40 rows, 3 departments with 2 teams, with ref_team and group sheets
data5_level = generate_level_data_case5()
data5_ref = generate_ref_team_data()
data5_group = generate_group_data(data5_level, case_number=5)
with pd.ExcelWriter("cases/case_5.xlsx", engine="openpyxl") as writer:
    data5_level.to_excel(writer, sheet_name="level", index=False)
    data5_ref.to_excel(writer, sheet_name="ref_team", index=False)
    data5_group.to_excel(writer, sheet_name="group", index=False)

print("Generated 5 XLSX files in the 'cases' folder:")
print("- case_1.xlsx to case_5.xlsx: 'level', 'ref_team', and 'group' sheets")
print("- case_5.xlsx: 'level' sheet with 40 teams (3 departments with A/B teams, A/B not in same group)")
print("- 'group' sheet: includes 'group', 'team_name', and 'level' columns")

Generated 5 XLSX files in the 'cases' folder:
- case_1.xlsx to case_5.xlsx: 'level', 'ref_team', and 'group' sheets
- case_5.xlsx: 'level' sheet with 40 teams (3 departments with A/B teams, A/B not in same group)
- 'group' sheet: includes 'group', 'team_name', and 'level' columns
