In [3]:
import pandas as pd
import numpy as np
import random

# Thiết lập seed để kết quả đồng nhất
np.random.seed(42)

# 1. Khởi tạo danh sách các dự án tiêu biểu của Turner & Townsend
project_names = [
    "Global Tech HQ - Fit Out", 
    "Solar Farm Infrastructure", 
    "Metro Line Expansion - Phase 2", 
    "Luxury Residential Tower", 
    "Green Energy Plant", 
    "City Hospital Redevelopment",
    "Data Center Construction",
    "Smart City Integrated Hub",
    "Airport Terminal Extension",
    "Public Library Refurbishment"
]

sectors = ["Real Estate", "Infrastructure", "Energy", "Real Estate", "Energy", 
           "Infrastructure", "Real Estate", "Digital Solutions", "Infrastructure", "Real Estate"]

pm_names = ["John Smith", "Maria Garcia", "David Chen", "Sarah Jenkins", "Michael Wong", 
            "Emma Taylor", "Robert Brown", "Lisa Anderson", "Kevin Lee", "Rachel Adams"]

# 2. Tạo dữ liệu Project Master (Quản lý dự án & Tài chính)
n_projects = len(project_names)

project_data = {
    'Project_ID': [f"TT-PRJ-{1000+i}" for i in range(n_projects)],
    'Project_Name': project_names,
    'Sector': sectors,
    'Project_Manager': pm_names,
    'Stage': [random.choice(['Pre-construction', 'Construction', 'Close-out']) for _ in range(n_projects)],
    'Planned_Budget': [round(random.uniform(500000, 5000000), -3) for _ in range(n_projects)],
}

df_projects = pd.DataFrame(project_data)

# Giả lập Actual Cost có biến động (vượt hoặc dưới ngân sách)
df_projects['Actual_Cost'] = df_projects['Planned_Budget'].apply(lambda x: round(x * random.uniform(0.85, 1.15), -2))

# Tính toán các chỉ số tài chính (Financial Calculations theo JD)
df_projects['Budget_Variance'] = df_projects['Actual_Cost'] - df_projects['Planned_Budget']
df_projects['Budget_Utilization_Rate'] = round((df_projects['Actual_Cost'] / df_projects['Planned_Budget']) * 100, 2)

# 3. Tạo dữ liệu Document Control (Quản lý hồ sơ theo JD)
doc_types = ["RFI", "Invoice", "Payment Application", "Safety Report", "Submittal", "Contract Ammendment"]
doc_statuses = ["Approved", "Pending", "Under Review", "Rejected"]

doc_records = []
for p_id in df_projects['Project_ID']:
    # Mỗi dự án có từ 40 đến 60 hồ sơ
    for _ in range(random.randint(40, 60)):
        doc_records.append({
            'Project_ID': p_id,
            'Document_ID': f"DOC-{random.randint(10000, 99999)}",
            'Document_Type': random.choice(doc_types),
            'Status': random.choices(doc_statuses, weights=[0.7, 0.1, 0.15, 0.05])[0], # Tỷ lệ Approved cao hơn
            'Submission_Date': pd.to_datetime('2025-01-01') + pd.to_timedelta(random.randint(0, 365), unit='D')
        })

df_documents = pd.DataFrame(doc_records)

# 4. Hiển thị kết quả kiểm tra
print("--- Project Financial Data (Preview) ---")
print(df_projects[['Project_Name', 'Planned_Budget', 'Actual_Cost', 'Budget_Utilization_Rate']].head())

print("\n--- Document Control Data (Preview) ---")
print(df_documents[['Project_ID', 'Document_Type', 'Status']].head())

# Xuất file CSV để dùng cho Dashboard
df_projects.to_csv('tt_project_master.csv', index=False)
df_documents.to_csv('tt_document_logs.csv', index=False)

--- Project Financial Data (Preview) ---
                     Project_Name  Planned_Budget  Actual_Cost  \
0        Global Tech HQ - Fit Out       3857000.0    3866700.0   
1       Solar Farm Infrastructure       2317000.0    2107600.0   
2  Metro Line Expansion - Phase 2       3739000.0    3925600.0   
3        Luxury Residential Tower       3260000.0    3180000.0   
4              Green Energy Plant       4595000.0    4722800.0   

   Budget_Utilization_Rate  
0                   100.25  
1                    90.96  
2                   104.99  
3                    97.55  
4                   102.78  

--- Document Control Data (Preview) ---
    Project_ID        Document_Type        Status
0  TT-PRJ-1000                  RFI  Under Review
1  TT-PRJ-1000  Payment Application      Approved
2  TT-PRJ-1000  Contract Ammendment      Approved
3  TT-PRJ-1000            Submittal       Pending
4  TT-PRJ-1000  Payment Application  Under Review
