# 0. 라이브러리

In [0]:
# import sys
# !{sys.executable} -m pip install seaborn statsmodels

In [0]:
import pandas as pd
import numpy as np
import random
import string
import matplotlib.pyplot as plt
from matplotlib import font_manager, rc
import datetime
import pytz

# 1. 데이터 불러오기

In [0]:
df_jk = pd.read_csv("jobkorea_data.csv")                # age, gender, education 참고 
job_df = pd.read_csv("job_skill_ratio.csv")             # job, skills 참고
career_df = pd.read_csv('career_ratio.csv')             # career 참고
df_region = pd.read_csv('region_plus_closet_v3.csv')    # location, work_location 참고
df_salary = pd.read_csv("job_exp_salary_v3.csv")        # salary 참고

# 2. 데이터 생성

## 2.1 age, career

In [0]:
# -----------------------------
# 2. 전체 나이대 비율
# -----------------------------
age_ratio_dict = {
    "25세이하": 18.72,
    "26세~30세": 34.61,
    "31세~35세": 17.88,
    "36세~40세": 7.18,
    "41세~45세": 8.19,
    "46세이상": 13.42
}
age_ratio_series = pd.Series(age_ratio_dict, dtype=float)
age_ratio_series /= age_ratio_series.sum()

# -----------------------------
# 3. 경력 순서 및 숫자 변환 함수
# -----------------------------
career_order = ["신입","1년","2년","3년","4년","5년","6년","7년","8년","9년",
                "10년","11년","12년","13년","14년","15년","16년","17년","18년","19년","20년 이상"]

def exp_to_num(exp):
    if exp == "신입":
        return 0
    elif exp == "20년 이상":
        return 30
    else:
        return int(exp.replace("년",""))

career_num = [exp_to_num(c) for c in career_order]

# -----------------------------
# 4. 경력 비율 불러오기
# -----------------------------
career_df['비율'] = career_df['비율'] / career_df['비율'].sum()
career_target_ratio = career_df.set_index('경력')['비율'].reindex(career_order).fillna(0)

# -----------------------------
# 5. 나이대별 경력 가능한 최대
# -----------------------------
max_exp_by_age = {
    "25세이하": 3,
    "26세~30세": 7,
    "31세~35세": 12,
    "36세~40세": 17,
    "41세~45세": 23,
    "46세이상": 30
}

# -----------------------------
# 6. 초기 나이대별 경력 비율 (가우시안 중심)
# -----------------------------
age_career_matrix = pd.DataFrame(0.0, index=age_ratio_series.index, columns=career_order, dtype=float)

for age in age_ratio_series.index:
    max_exp = max_exp_by_age[age]
    center = max_exp - 2
    sigma = max_exp / 4
    for c in career_order:
        num = exp_to_num(c)
        if num <= max_exp:
            age_career_matrix.loc[age, c] = np.exp(-0.5*((num - center)/sigma)**2)
    # 정규화
    if age_career_matrix.loc[age].sum() > 0:
        age_career_matrix.loc[age] /= age_career_matrix.loc[age].sum()
    else:
        age_career_matrix.loc[age] = 1.0 / len(career_order)

# -----------------------------
# 7. 나이대별 세세한 가중치 적용
# -----------------------------
weight_matrix = pd.DataFrame(0.0, index=age_ratio_series.index, columns=career_order, dtype=float)

# 각 나이대별 가중치
weight_matrix.loc["25세이하"] = [1.5,1.3,1.2,1.1,1.0,0.9,0.8,0.7,0.6,0.5,
                                 0.4,0.3,0.2,0.15,0.1,0.1,0.05,0.05,0.03,0.02,0.01]
weight_matrix.loc["26세~30세"] = [1.2,1.1,1.05,1.0,0.95,0.9,0.85,0.8,0.7,0.6,
                                  0.5,0.4,0.3,0.2,0.15,0.1,0.05,0.05,0.03,0.02,0.01]
weight_matrix.loc["31세~35세"] = [0.05,0.1,0.2,0.4,0.6,0.8,1.0,1.1,1.0,0.9,
                                  0.8,0.7,0.6,0.5,0.4,0.3,0.2,0.1,0.05,0.03,0.01]
weight_matrix.loc["36세~40세"] = [0.01,0.05,0.1,0.3,0.5,0.7,1.0,1.1,1.2,1.3,
                                  1.2,1.1,1.0,0.9,0.8,0.7,0.6,0.5,0.4,0.3,0.2]
weight_matrix.loc["41세~45세"] = [0.0,0.02,0.05,0.1,0.3,0.5,0.8,1.0,1.2,1.3,
                                  1.4,1.3,1.2,1.1,1.0,0.9,0.8,0.7,0.6,0.5,0.4]
weight_matrix.loc["46세이상"] = [0.0,0.01,0.02,0.05,0.1,0.3,0.5,0.7,0.9,1.0,
                                  1.2,1.3,1.4,1.3,1.2,1.1,1.0,0.9,0.8,0.7,0.6]

age_career_matrix = age_career_matrix * weight_matrix
age_career_matrix = age_career_matrix.div(age_career_matrix.sum(axis=1), axis=0)

# -----------------------------
# 8. 전체 경력 비율 맞춤 루프
# -----------------------------
for iteration in range(50):
    current_career_ratio = (age_career_matrix.T * age_ratio_series).T.sum()
    correction = career_target_ratio / current_career_ratio
    correction.replace([np.inf, -np.inf], 1, inplace=True)
    correction.fillna(1, inplace=True)
    for age in age_career_matrix.index:
        age_career_matrix.loc[age] *= correction
        age_career_matrix.loc[age] /= age_career_matrix.loc[age].sum()

# -----------------------------
# 9. 나이대 샘플링 
# -----------------------------
#####################
###더미 생성량 옵션###
#####################
N = 10000 ###########
#####################
#####################

age_samples = np.random.choice(age_ratio_series.index, size=N, p=age_ratio_series.values.astype(float))

# -----------------------------
# 10. 나이대별 경력 샘플링
# -----------------------------
exp_samples = []
for age in age_samples:
    probs = age_career_matrix.loc[age].values.astype(float)
    exp_samples.append(np.random.choice(career_order, p=probs))

# -----------------------------
# 11. 더미 데이터 생성
# -----------------------------
df_dummy = pd.DataFrame({
    'age_category': age_samples,
    'career': exp_samples
})

In [0]:
df_dummy['age'] = df_dummy['age_category'].map({
    '25세이하': lambda _: random.randint(20, 25),
    '26세~30세': lambda _: random.randint(26, 30),
    '31세~35세': lambda _: random.randint(31, 35),
    '36세~40세': lambda _: random.randint(36, 40),
    '41세~45세': lambda _: random.randint(41, 45),
    '46세이상': lambda _: random.randint(46, 51)
}).apply(lambda f: f(None))

## 2.2 job, skills_current

In [0]:
# -----------------------------
# 12. 직무 + 스킬 추가
# -----------------------------
job_df["비율"] = job_df["비율"] / job_df["비율"].sum()
job_probs = job_df["비율"].values.astype(float)
job_roles = job_df["직무"].values
chosen_jobs = np.random.choice(job_roles, size=N, p=job_probs)
df_dummy["job"] = chosen_jobs

skills_list = []
for job in chosen_jobs:
    available_skills = job_df.loc[job_df["직무"]==job,"스킬"].values[0]
    available_skills = available_skills.strip("{}").split(";")
    n_skills = random.randint(3,5)
    chosen_skills = random.sample(available_skills, min(n_skills,len(available_skills)))
    skills_list.append(";".join(chosen_skills))
df_dummy["skills_current"] = skills_list

## 2.2.1 skfn_current (=skillfulness_current)

In [0]:
career_category = {
    '신입': '신입',
    '주니어': ['1년', '2년', '3년'],
    '미드': ['4년', '5년', '6년'],
    '시니어': ['7년', '8년', '9년', '10년', '11년', '12년', '13년', '14년', '15년', '16년', '17년', '18년', '19년', '20년 이상']
}

skfn_range = {
    '신입': [0, 10],
    '주니어': [11, 25],
    '미드': [26, 40],
    '시니어': [41, 55] 
}

In [0]:
def get_career_category(career):
    if career == '신입':
        return '신입'
    for cat, years in career_category.items():
        if career in years:
            return cat
    return '시니어'

def generate_skillfulness(row):
    cat = get_career_category(row['career'])
    min, max = skfn_range[cat]
    skills = row['skills_current'].split(';')
    vals = [str(random.randint(min, max)) for skill in skills]
    return ';'.join(vals)

df_dummy['skfn_current'] = df_dummy.apply(generate_skillfulness, axis=1)

In [0]:
df_dummy.head()

Unnamed: 0,age_category,career,age,job,skills_current,skfn_current
0,41세~45세,2년,42,서버/백엔드 개발자,MVVM;Classic ASP;Microsoft Excel,16;19;18
1,31세~35세,신입,31,DBA,Amazon ElastiCache;SQL;Python;NGINX,2;10;2;6
2,26세~30세,1년,29,안드로이드 개발자,Mac OS X;Webpack;Dart;Lottie;React,11;18;15;15;22
3,26세~30세,2년,30,크로스플랫폼 앱개발자,React Native;Zustand;Spring Framework,22;23;11
4,31세~35세,3년,31,인공지능/머신러닝,PyTorch;C#;yolo;Windows;DVC,12;14;18;11;21


## 2.2.2 skills_past/pp, skfn_past/pp

In [0]:
df_dummy['skills_past'] = None
df_dummy['skfn_past'] = None
df_dummy['skills_p2'] = None
df_dummy['skfn_p2'] = None
df_dummy['skills_p3'] = None
df_dummy['skfn_p3'] = None
df_dummy['skills_p4'] = None
df_dummy['skfn_p4'] = None
df_dummy['skills_p5'] = None
df_dummy['skfn_p5'] = None
df_dummy['skills_p6'] = None
df_dummy['skfn_p6'] = None

## 2.3 employee_ID

In [0]:
# -----------------------------
# 13. 구직자 ID 생성
# -----------------------------
letters = list(string.ascii_lowercase)
ids = []
counter = 1
letter_index = 0

for i in range(N):
    ids.append(f"{letters[letter_index]}{counter:08d}")
    counter += 1
    if counter > 99999999:
        counter = 1
        letter_index += 1
df_dummy["employee_id"] = ids

## 2.4 gender, education

In [0]:
# 합산에 필요한 column list 정의
age_cols = ['25세이하', '26세~30세', '31세~35세', '36세~40세', '41세~45세', '46세이상']
gender_cols = ['남자', '여자']
edu_cols = ['고졸미만', '고졸(예정)', '초대졸(예정)', '대졸(예정)', '석박사(예정)']

# 각 column마다 합산
age_total = [df_jk[col].sum() for col in age_cols]
gender_total = [df_jk[col].sum() for col in gender_cols]
edu_total = [df_jk[col].sum() for col in edu_cols]

age_total = np.array(age_total)
gender_total = np.array(gender_total)
edu_total = np.array(edu_total)

total_count = df_jk['지원자수'].sum()

# 결과를 저장할 변수 초기화
best_max_diff = float('inf')
best_iterations = 0
best_joint_array = None

# 반복 횟수 리스트
iterations_list = [500, 1000, 3000, 10000, 20000]

# 각 반복 횟수에 대해 시뮬레이션 실행
for max_iterations in iterations_list:
    # 매 시뮬레이션마다 joint_array를 초기화
    joint_array = np.ones((6, 2, 5))
    # tol = 1e-6
    tol = 1
    
    for i in range(max_iterations):
        # a. 연령대 합계에 맞춰 조정
        age_sum = joint_array.sum(axis=(1, 2))
        joint_array *= (age_total / age_sum)[:, np.newaxis, np.newaxis]
    
        # b. 성별 합계에 맞춰 조정
        gender_sum = joint_array.sum(axis=(0, 2))
        joint_array *= (gender_total / gender_sum)[np.newaxis, :, np.newaxis]
        
        # c. 학력 합계에 맞춰 조정
        edu_sum = joint_array.sum(axis=(0, 1))
        joint_array *= (edu_total / edu_sum)[np.newaxis, np.newaxis, :]
        
        # 수렴 여부 확인 (주석 해제 권장)
        age_diff = np.abs(joint_array.sum(axis=(1,2)) - age_total).sum()
        gender_diff = np.abs(joint_array.sum(axis=(0,2)) - gender_total).sum()
        edu_diff = np.abs(joint_array.sum(axis=(0,1)) - edu_total).sum()
        
        if age_diff < tol and gender_diff < tol and edu_diff < tol:
            print(f"IPF가 {i}번의 반복 후 수렴했습니다.")
            break
            
    # 현재 시뮬레이션의 최대 오차 계산
    max_diff = max(age_diff, gender_diff, edu_diff)

    # 최적 결과 업데이트
    if max_diff < best_max_diff:
        best_max_diff = max_diff
        best_iterations = max_iterations
        best_joint_array = joint_array.copy()

# 확률 분포로 변환
total_sum = best_joint_array.sum()
joint_prob = best_joint_array / total_sum

# 확률 분포를 1차원 배열로 평탄화
flat_prob = joint_prob.flatten()

In [0]:
# 모든 가능한 조합 리스트 생성
all_combinations = []
for age in age_cols:
    for gender in gender_cols:
        for edu in edu_cols:
            all_combinations.append((age, gender, edu))

In [0]:
# age별 조건부 분포 계산
conditional_probs = best_joint_array / best_joint_array.sum(axis=(1,2), keepdims=True)

# 샘플링 함수
def sample_gender_edu(age_value):
    age_idx = age_cols.index(age_value)
    probs = conditional_probs[age_idx].flatten()  # shape (10,)
    choice = np.random.choice(len(probs), p=probs)
    g_idx, e_idx = divmod(choice, len(edu_cols))
    return gender_cols[g_idx], edu_cols[e_idx]

# df_jk에 gender, edu 채워넣기
df_dummy['gender'], df_dummy['education'] = zip(*df_dummy['age_category'].map(sample_gender_edu))

df_dummy

Unnamed: 0,age_category,career,age,job,skills_current,skfn_current,skills_past,skfn_past,skills_pp,skfn_pp,employee_id,gender,education
0,25세이하,신입,21,SW/솔루션,Adobe Target;FW;MSA,5;1;4,,,,,a00000001,남자,대졸(예정)
1,36세~40세,11년,37,SW/솔루션,React Router;Ccnp;NestJS,46;46;43,,,,,a00000002,남자,대졸(예정)
2,25세이하,신입,22,인공지능/머신러닝,Next.js;CrewAI;GraphicsMagick,9;10;7,,,,,a00000003,남자,대졸(예정)
3,31세~35세,8년,33,QA 엔지니어,Android OS;Redmine;C,55;46;49,,,,,a00000004,남자,대졸(예정)
4,26세~30세,1년,28,DevOps/시스템 엔지니어,K8S;Vue.js;Looker,19;19;11,,,,,a00000005,남자,대졸(예정)
...,...,...,...,...,...,...,...,...,...,...,...,...,...
9995,26세~30세,3년,27,DevOps/시스템 엔지니어,Amazon EKS;Apache Tomcat;ASP.NET;Kotlin;Window...,18;19;12;16;12,,,,,a00009996,여자,대졸(예정)
9996,46세이상,20년 이상,47,정보보안 담당자,QueryDSL;vmware;Windows Terminal;Insight,51;45;49;55,,,,,a00009997,여자,대졸(예정)
9997,26세~30세,2년,27,정보보안 담당자,Solidity;CloudFlare;L3;React Query,16;23;20;25,,,,,a00009998,남자,대졸(예정)
9998,26세~30세,1년,26,프론트엔드 개발자,Kafka;Backendless;RxJS;PostgreSQL,23;18;12;14,,,,,a00009999,여자,대졸(예정)


## 2.5 location, work_location

In [0]:
# '경제활동인구' 기반 확률(p) 계산
pop_weights = np.array(df_region['경제활동인구 (천명)'])
proba = pop_weights / pop_weights.sum()

# 분포에 맞춰 할당
df_dummy['location'] = np.random.choice(
    df_region['행정구역명'],
    size=len(df_dummy),
    p=proba
)

In [0]:
# 문자열을 리스트로 변환
df_region['closest_sidos_list'] = df_region['closest_sidos'].str.split(';')
df_region['closest_sigungu_list'] = df_region['closest_sigungu'].str.split(';')

# 딕셔너리
df_region_unique = df_region.drop_duplicates(subset=['행정구역명'], keep='first')
region_dict = df_region_unique.set_index('행정구역명').to_dict('index')

conditions = [
    '시/도 전체 1개',
    '시/도 전체 2개',
    '시군구 2개',
    '시/도 전체 1개 + 시군구 2개',
    '시군구 3개'
]

def get_work_location(location):
    region_info = region_dict.get(location, {})
    current_sido = region_info.get('시도명')
    sido_list = region_info.get('closest_sidos_list', [])
    sigungu_list = region_info.get('closest_sigungu_list', [])

    def select_locations(cond):
        if cond == '시/도 전체 1개':
            return [current_sido]
        elif cond == '시/도 전체 2개':
            return [current_sido] + random.sample(sido_list, min(1, len(sido_list)))
        elif cond == '시군구 2개':
            return [location] + random.sample(sigungu_list, min(1, len(sigungu_list)))
        elif cond == '시/도 전체 1개 + 시군구 2개':
            return random.sample(sido_list, min(1, len(sido_list))) + [location] + random.sample(sigungu_list, min(1, len(sigungu_list)))
        elif cond == '시군구 3개':
            return [location] + random.sample(sigungu_list, min(2, len(sigungu_list)))
        return []

    chosen_condition = random.choice(conditions)
    location_candidates = select_locations(chosen_condition)

    # 후보가 없으면 기본값 fallback
    if not location_candidates:
        location_candidates = [current_sido]

    return ', '.join(location_candidates)


# df_dummy_test에 'work_location' 열 추가
df_dummy['work_location'] = df_dummy['location'].apply(get_work_location)

## 2.6 employee_name

In [0]:
N = df_dummy.shape[0]
surnames = ["김","이","박","최","정","강","조","윤","장","임","한","오","서","신","권","황","안","송","전",
            "홍","유","고","문","양","손","배","조","백","허","유","남","심","노","정","하","곽","성",
            "차","주","우","구","신","임","전","민","유","류","나"]

employee_names = [random.choice(surnames) + "XX" for _ in range(N)]
df_dummy["employee_name"] = employee_names

## 2.7 hope_salary

In [0]:
# -----------------------------
# 15. 희망연봉 할당 (job_exp_salary_v3 기준)
# -----------------------------

# 2. career -> job_exp_salary_v3 컬럼 매핑
career_to_col = {
    "신입": "0년차",
    "1년": "1년차",
    "2년": "2년차",
    "3년": "3년차",
    "4년": "4년차",
    "5년": "5년차",
    "6년": "6년차",
    "7년": "7년차",
    "8년": "8년차",
    "9년": "9년차",
    "10년": "10년차",
    "11년": "11년차",
    "12년": "12년차",
    "13년": "13년차",
    "14년": "14년차",
    "15년": "15년차",
    "16년": "16년차",
    "17년": "17년차",
    "18년": "18년차",
    "19년": "19년차",
    "20년 이상": "20년차"
}

# 3. 전체 연봉 컬럼 및 비율 계산 (df_jk 기준)
salary_cols = ["1800미만", "1800~2200", "2200~2600", "2600~3000",
               "3000~3400", "3400~4000", "4000이상", "면접후결정"]

salary_dist = df_jk[salary_cols].sum()
salary_ratio = salary_dist / salary_dist.sum()

# df_dummy 총 N
N = df_dummy.shape[0]

# 인원 카운터 초기화
salary_counter = {col: int(round(salary_ratio[col]*N)) for col in salary_cols}

# 4. 희망연봉 배정
hope_salary = []

for idx, row in df_dummy.iterrows():
    job = row['job']
    career = row['career']
    career_col = career_to_col[career]
    
    # job_exp_salary_v3에서 해당 연봉값 가져오기
    try:
        val = df_salary.loc[df_salary['job']==job, career_col].values[0]
    except:
        val = "면접후결정"
    
    # 남은 인원 확인 후 배정
    if val in salary_counter and salary_counter[val] > 0:
        hope_salary.append(val)
        salary_counter[val] -= 1
    else:
        hope_salary.append("면접후결정")

# 5. df_dummy에 컬럼 추가
df_dummy['hope_salary'] = hope_salary

## 2.8 timestamp

In [0]:
current_timestamp = datetime.datetime.now(pytz.timezone('Asia/Seoul'))
df_dummy['timestamp'] = current_timestamp

df_dummy.head()

Unnamed: 0,age_category,career,age,job,skills_current,skfn_current,skills_past,skfn_past,skills_pp,skfn_pp,employee_id,gender,education,location,work_location,employee_name,hope_salary,timestamp
0,25세이하,신입,21,SW/솔루션,Adobe Target;FW;MSA,5;1;4,,,,,a00000001,남자,대졸(예정),인천광역시 부평구,"인천광역시 부평구, 인천광역시 동구",백XX,3400~4000,2025-09-23 09:33:15.850945+09:00
1,36세~40세,11년,37,SW/솔루션,React Router;Ccnp;NestJS,46;46;43,,,,,a00000002,남자,대졸(예정),광주광역시 북구,"광주광역시, 전북특별자치도",황XX,4000이상,2025-09-23 09:33:15.850945+09:00
2,25세이하,신입,22,인공지능/머신러닝,Next.js;CrewAI;GraphicsMagick,9;10;7,,,,,a00000003,남자,대졸(예정),인천광역시 중구,"인천광역시 중구, 인천광역시 동구, 인천광역시 연수구",남XX,3400~4000,2025-09-23 09:33:15.850945+09:00
3,31세~35세,8년,33,QA 엔지니어,Android OS;Redmine;C,55;46;49,,,,,a00000004,남자,대졸(예정),인천광역시 부평구,"경기도, 인천광역시 부평구, 인천광역시 계양구",이XX,4000이상,2025-09-23 09:33:15.850945+09:00
4,26세~30세,1년,28,DevOps/시스템 엔지니어,K8S;Vue.js;Looker,19;19;11,,,,,a00000005,남자,대졸(예정),부산광역시 북구,"경상남도, 부산광역시 북구, 부산광역시 강서구",권XX,3400~4000,2025-09-23 09:33:15.850945+09:00


# 3. 컬럼 재정렬

In [0]:
df_dummy = df_dummy[['employee_id', 'employee_name', 'age', 'age_category', 'gender', 'location', 'education', 'career', 'job', 'skills_current', 'skfn_current', 'work_location', 'hope_salary', 'skills_past', 'skfn_past', 'skills_p2', 'skfn_p2', 'skills_p3', 'skfn_p3', 'skills_p4', 'skfn_p4', 'skills_p5', 'skfn_p5', 'skills_p6', 'skfn_p6', 'timestamp']]

df_dummy.head()

Unnamed: 0,employee_id,employee_name,age,age_category,gender,location,education,career,job,skills_current,skfn_current,work_location,hope_salary,skills_past,skfn_past,skills_p2,skfn_p2,skills_p3,skfn_p3,skills_p4,skfn_p4,skills_p5,skfn_p5,skills_p6,skfn_p6,timestamp
0,a00000001,백XX,21,25세이하,남자,인천광역시 부평구,대졸(예정),신입,SW/솔루션,Adobe Target;FW;MSA,5;1;4,"인천광역시 부평구, 인천광역시 동구",3400~4000,,,,,,,,,,,,,2025-09-23 09:33:15.850945+09:00
1,a00000002,황XX,37,36세~40세,남자,광주광역시 북구,대졸(예정),11년,SW/솔루션,React Router;Ccnp;NestJS,46;46;43,"광주광역시, 전북특별자치도",4000이상,,,,,,,,,,,,,2025-09-23 09:33:15.850945+09:00
2,a00000003,남XX,22,25세이하,남자,인천광역시 중구,대졸(예정),신입,인공지능/머신러닝,Next.js;CrewAI;GraphicsMagick,9;10;7,"인천광역시 중구, 인천광역시 동구, 인천광역시 연수구",3400~4000,,,,,,,,,,,,,2025-09-23 09:33:15.850945+09:00
3,a00000004,이XX,33,31세~35세,남자,인천광역시 부평구,대졸(예정),8년,QA 엔지니어,Android OS;Redmine;C,55;46;49,"경기도, 인천광역시 부평구, 인천광역시 계양구",4000이상,,,,,,,,,,,,,2025-09-23 09:33:15.850945+09:00
4,a00000005,권XX,28,26세~30세,남자,부산광역시 북구,대졸(예정),1년,DevOps/시스템 엔지니어,K8S;Vue.js;Looker,19;19;11,"경상남도, 부산광역시 북구, 부산광역시 강서구",3400~4000,,,,,,,,,,,,,2025-09-23 09:33:15.850945+09:00


# 4. 데이터 저장

In [0]:
df_dummy.to_csv('brz_employee_info.csv', index=False)