In [15]:
import pandas as pd
import numpy as np

from sklearn.preprocessing import LabelEncoder, OneHotEncoder, StandardScaler
from sklearn.impute import SimpleImputer
from sklearn.compose import ColumnTransformer 

# Data company
company_detail_list = {
    'Company 1': {
        'name': 'Company 1',
        'domain': 'Retail',
        'clients': [
            {
                'name': 'acme.inc',
                'country': 'united states'
            },
            {
                'name': 'Wayne.co',
                'country': 'united states'
            }
        ]
    },
    'Company 2': {
        'name': 'Company 2',
        'domain': 'Construction',
        'clients': [
            {
                'name': 'Tesla',
                'country': 'united states'
            },
            {
                'name': 'Japan Airlines',
                'country': 'japan'
            },
            {
                'name': 'Indofood',
                'country': 'indonesia'
            }
        ]
    },
    'Company 3': {
        'name': 'Company 3',
        'domain': 'Healthcare',
        'clients': [
            {
                'name': 'Petronas',
                'country': 'malaysia'
            },
            {
                'name': 'VW Group',
                'country': 'germany'
            },
            {
                'name': 'IBM',
                'country': 'united states'
            },
            {
                'name': 'Mitsubishi',
                'country': 'japan'
            }
        ]
    }
}

company_name_list = [{'name': 'Company 1'},
          {'name': 'Company 2'},
          {'name': 'Company 3'}]

employee_name_list = [{'name': 'John Doe'},
          {'name': 'Tom Smith'},
          {'name': 'Andrew Sebastian'}]

employee_detail_list = {
      'John Doe': {
          'name': 'EMP-0001',
          'first_name': 'John',
          'last_name': 'Doe',
          'full_name': 'John Doe',
          'company': 'Company 1'
      },
      'Tom Smith': {
          'name': 'EMP-0002',
          'first_name': 'Tom',
          'last_name': 'Smith',
          'full_name': 'Tom Smith',
          'company': 'Company 2'
      },
      'Andrew Sebastian': {
          'name': 'EMP-0003',
          'first_name': 'Andrew',
          'last_name': 'Sebastian',
          'full_name': 'Andrew Sebastian',
          'company': 'Company 2'
      },
  }


# jawaban no 1 
def sort_company(company_detail_list):
    # Extract the list of companies
    companies = [
        {"name": details['name'], "domain": details['domain']}
        for details in company_detail_list.values()
    ]
    
    # Sort the list of companies by 'domain' in reverse order
    sorted_companies = sorted(companies, key=lambda x: x['domain'], reverse=True)
    
    return sorted_companies

sorted_companies = sort_company(company_detail_list)
print("jawaban 1")
print(sorted_companies)

# jawaban no 2
def get_company_domain(company_detail_list):
    # Iterate through each company in the list
    for company in company_detail_list.values():
        # Extract the name, domain, and the number of clients
        name = company['name']
        domain = company['domain']
        num_clients = len(company['clients'])
        # Print the required information
        print(f"{name}: {domain}, relation: {num_clients} clients")
        
print("jawaban 2")
get_company_domain(company_detail_list)

# jawaban no 3
def get_employees(employee_detail_list, company_detail_list):
    employees_with_domain = []
    
    # Iterate over each employee in the employee detail list
    for employee in employee_detail_list.values():
        # Get the employee's full name and company name
        full_name = employee['full_name']
        company_name = employee['company']
        
        # Get the domain of the company the employee works for
        domain = company_detail_list[company_name]['domain']
        
        # Create a dictionary with the required information and add it to the list
        employee_with_domain = {
            "full_name": full_name,
            "company": company_name,
            "domain": domain
        }
        employees_with_domain.append(employee_with_domain)
    
    return employees_with_domain
employees = get_employees(employee_detail_list, company_detail_list)
print("jawaban 3")
print(employees)


# jawaban no 4
def get_companies_with_employees(company_detail_list, employee_detail_list):
    # Initialize the result list
    companies_with_employees = []

    # Iterate over each company in the company_detail_list
    for company in company_detail_list.values():
        company_name = company['name']
        
        # Find employees working in this company
        employees = [
            employee['full_name'] 
            for employee in employee_detail_list.values() 
            if employee['company'] == company_name
        ]
        
        # Create a dictionary with the company name and its employees
        company_info = {
            "company": company_name,
            "employees": employees
        }
        
        # Add the company info to the result list
        companies_with_employees.append(company_info)
    
    return companies_with_employees

companies_with_employees = get_companies_with_employees(company_detail_list, employee_detail_list)
print("jawaban 4")
print(companies_with_employees)


print("------------------ Soal pre-processing data ----------------------")


data_startup = pd.read_csv('50_Startups.csv')
data_startup.sample(7)
X = data_startup.iloc[:].values
# Initialize the SimpleImputer for numeric data
imputer = SimpleImputer(missing_values=np.nan, strategy='mean')
# Select the numeric columns (columns 0, 1, 2, 4)
numeric_columns = [0, 1, 2, 4]
# Apply the imputer to each numeric column
for col in numeric_columns:
    X[:, col] = imputer.fit_transform(X[:, col].reshape(-1, 1)).flatten()
print("jawaban 1")
print(X)

print("------------------------------------------------------------------------------")
# One-hot encode the 'State' column
column_transformer = ColumnTransformer(
    transformers=[
        ('state', OneHotEncoder(), [3])
    ],
    remainder='passthrough'
)

X_transformed = column_transformer.fit_transform(X)
print("jawaban 2")
print(X_transformed)

print("-------------------------------------------------------------------------------")

marketing_spend_col = 4
administration_col = 5
profit_col = 6

# Calculate the Tax for each row
tax = (X_transformed[:, profit_col].astype(float) + 
       X_transformed[:, marketing_spend_col].astype(float) + 
       X_transformed[:, administration_col].astype(float)) * 0.05

# Add the Tax column to the dataset
X_final = np.hstack([X_transformed, tax.reshape(-1, 1)])
print("jawaban 3")
print(X_final)

print("-------------------------------------------------------------------------------")

scaler = StandardScaler()
X_scaled = scaler.fit_transform(X_final)
print("jawaban 4")
print(X_scaled)
print("-------------------------------------------------------------------------------")


jawaban 1
[{'name': 'Company 1', 'domain': 'Retail'}, {'name': 'Company 3', 'domain': 'Healthcare'}, {'name': 'Company 2', 'domain': 'Construction'}]
jawaban 2
Company 1: Retail, relation: 2 clients
Company 2: Construction, relation: 3 clients
Company 3: Healthcare, relation: 4 clients
jawaban 3
[{'full_name': 'John Doe', 'company': 'Company 1', 'domain': 'Retail'}, {'full_name': 'Tom Smith', 'company': 'Company 2', 'domain': 'Construction'}, {'full_name': 'Andrew Sebastian', 'company': 'Company 2', 'domain': 'Construction'}]
jawaban 4
[{'company': 'Company 1', 'employees': ['John Doe']}, {'company': 'Company 2', 'employees': ['Tom Smith', 'Andrew Sebastian']}, {'company': 'Company 3', 'employees': []}]
------------------ Soal pre-processing data ----------------------
jawaban 1
[[165349.2 136897.8 471784.1 'New York' 192261.83]
 [162597.7 151377.59 443898.53 'California' 191792.06]
 [153441.51 101145.55 407934.54 'Florida' 191050.39]
 [144372.41 118671.85 383199.62 'New York' 182901.9