In [1]:
# data

company_name_list = [{'name': 'Company 1'},
          {'name': 'Company 2'},
          {'name': 'Company 3'}]

employee_name_list = [{'name': 'John Doe'},
          {'name': 'Tom Smith'},
          {'name': 'Andrew Sebastian'}]

company_detail_list = {
      'Company 1': {
          'name': 'Company 1',
          'domain': 'Retail',
          'clients': [
              {
                  'name': 'acme.inc',
                  'country': 'united states'
              },
              {
                  'name': 'Wayne.co',
                  'country': 'united states'
              }
          ]
      },
      'Company 2': {
          'name': 'Company 2',
          'domain': 'Construction',
          'clients': [
              {
                  'name': 'Tesla',
                  'country': 'united states'
              },
              {
                  'name': 'Japan Airlines',
                  'country': 'japan'
              },
              {
                  'name': 'Indofood',
                  'country': 'indonesia'
              }
          ]
      },
      'Company 3': {
          'name': 'Company 3',
          'domain': 'Healthcare',
          'clients': [
              {
                  'name': 'Petronas',
                  'country': 'malaysia'
              },
              {
                  'name': 'VW Group',
                  'country': 'germany'
              },
              {
                  'name': 'IBM',
                  'country': 'united states'
              },
              {
                  'name': 'Mitsubishi',
                  'country': 'japan'
              }
          ]
      }
  }

employee_detail_list = {
      'John Doe': {
          'name': 'EMP-0001',
          'first_name': 'John',
          'last_name': 'Doe',
          'full_name': 'John Doe',
          'company': 'Company 1'
      },
      'Tom Smith': {
          'name': 'EMP-0002',
          'first_name': 'Tom',
          'last_name': 'Smith',
          'full_name': 'Tom Smith',
          'company': 'Company 2'
      },
      'Andrew Sebastian': {
          'name': 'EMP-0003',
          'first_name': 'Andrew',
          'last_name': 'Sebastian',
          'full_name': 'Andrew Sebastian',
          'company': 'Company 2'
      },
  }

In [2]:
def sort_company():
    perusahaan = [
        {
            'name': company['name'],
            'domain': company['domain']
        }
        for company in company_detail_list.values()
    ]
    
    sorted_companies = sorted(perusahaan, key=lambda x: x['domain'], reverse=True)
    
    return sorted_companies

sorted_companies = sort_company()
print(sorted_companies)

[{'name': 'Company 1', 'domain': 'Retail'}, {'name': 'Company 3', 'domain': 'Healthcare'}, {'name': 'Company 2', 'domain': 'Construction'}]


In [3]:
def get_company_domain():
    for perusahaan in company_detail_list.values():
        name = perusahaan['name']
        domain = perusahaan['domain']
        client_count = len(perusahaan['clients'])
        print(f"{name}: {domain}, relation: {client_count} clients")
get_company_domain()

Company 1: Retail, relation: 2 clients
Company 2: Construction, relation: 3 clients
Company 3: Healthcare, relation: 4 clients


In [4]:
def get_employees():
    employees_with_domain = [
        {
            'full_name': employee['full_name'],
            'company': employee['company'],
            'domain': company_detail_list[employee['company']]['domain']
        }
        for employee in employee_detail_list.values()
    ]
    
    return employees_with_domain
employees = get_employees()
print(employees)

[{'full_name': 'John Doe', 'company': 'Company 1', 'domain': 'Retail'}, {'full_name': 'Tom Smith', 'company': 'Company 2', 'domain': 'Construction'}, {'full_name': 'Andrew Sebastian', 'company': 'Company 2', 'domain': 'Construction'}]


In [5]:
def get_employees_by_company() :
    employees_by_company = {company: [] for company in company_detail_list}
    
    for employee in employee_detail_list.values():
        company = employee['company']
        employees_by_company[company].append(employee['full_name'])
    
    result = [
        {
            'company': company,
            'employees': employees
        }
        for company, employees in employees_by_company.items()
    ]
    
    return result

employees_by_company = get_employees_by_company()
print(employees_by_company)

[{'company': 'Company 1', 'employees': ['John Doe']}, {'company': 'Company 2', 'employees': ['Tom Smith', 'Andrew Sebastian']}, {'company': 'Company 3', 'employees': []}]


In [1]:
import pandas as pd
import numpy as np

from sklearn.preprocessing import LabelEncoder, OneHotEncoder, StandardScaler
from sklearn.impute import SimpleImputer
from sklearn.compose import ColumnTransformer 

data_startup = pd.read_csv('UTS/50_Startups.csv')
data_startup.sample(7)

Unnamed: 0,R&D Spend,Administration,Marketing Spend,State,Profit
20,76253.86,113867.3,298664.47,California,118474.03
29,65605.48,153032.06,107138.38,New York,101004.64
40,28754.33,118546.05,172795.67,California,78239.91
24,77044.01,,140574.81,New York,108552.04
19,86419.7,153514.11,,New York,122776.86
21,78389.47,153773.43,299737.29,New York,111313.02
34,46426.07,157693.92,210797.67,California,96712.8


In [2]:
data_startup.isna()

Unnamed: 0,R&D Spend,Administration,Marketing Spend,State,Profit
0,False,False,False,False,False
1,False,False,False,False,False
2,False,False,False,False,False
3,False,False,False,False,False
4,False,False,False,False,False
5,False,False,False,False,False
6,False,False,False,False,False
7,False,True,False,False,False
8,False,False,False,False,False
9,False,False,False,False,False


In [3]:
missing_values = data_startup.isna().any()
for column, is_missing in missing_values.items():
    if is_missing:
        mean_value = data_startup[column].mean()
        data_startup[column].fillna(mean_value, inplace=True)

print("\nData setelah pre-processing:")
data_startup.sample(7)


Data setelah pre-processing:


Unnamed: 0,R&D Spend,Administration,Marketing Spend,State,Profit
1,162597.7,151377.59,443898.53,California,191792.06
12,93863.75,127320.38,249839.44,Florida,141585.52
19,86419.7,153514.11,223582.690222,New York,122776.86
3,144372.41,118671.85,383199.62,New York,182901.99
41,27892.92,84710.77,164470.71,Florida,77798.83
5,131876.9,99814.71,362861.36,New York,156991.12
16,78013.11,121597.55,264346.06,California,126992.93


In [4]:
X = data_startup[['State']]
one_hot_encoder = OneHotEncoder()
ct = ColumnTransformer([("OneHotEncoder", one_hot_encoder,[0])], remainder="passthrough")
X = ct.fit_transform(X)
df = pd.DataFrame(X)

print("\nData setelah one-hot encoding:")
df


Data setelah one-hot encoding:


Unnamed: 0,0,1,2
0,0.0,0.0,1.0
1,1.0,0.0,0.0
2,0.0,1.0,0.0
3,0.0,0.0,1.0
4,0.0,1.0,0.0
5,0.0,0.0,1.0
6,1.0,0.0,0.0
7,0.0,1.0,0.0
8,0.0,0.0,1.0
9,1.0,0.0,0.0


In [5]:
data_startup['Tax'] = (data_startup['Profit'] + data_startup['Marketing Spend'] + data_startup['Administration']) * 0.05
data_startup


Unnamed: 0,R&D Spend,Administration,Marketing Spend,State,Profit,Tax
0,165349.2,136897.8,471784.1,New York,192261.83,40047.1865
1,162597.7,151377.59,443898.53,California,191792.06,39353.409
2,153441.51,101145.55,407934.54,Florida,191050.39,35006.524
3,144372.41,118671.85,383199.62,New York,182901.99,34238.673
4,142107.34,91391.77,366168.42,Florida,166187.94,31187.4065
5,131876.9,99814.71,362861.36,New York,156991.12,30983.3595
6,134615.46,147198.87,127716.82,California,156122.51,21551.91
7,130298.13,122790.158298,323876.68,Florida,155752.6,30120.971915
8,120542.52,148718.95,311613.29,New York,152211.77,30627.2005
9,123334.88,108679.17,304981.62,California,149759.96,28171.0375


In [6]:
numeric_columns = ['R&D Spend', 'Administration', 'Marketing Spend', 'Profit', 'Tax']
numeric_data = data_startup[numeric_columns]
scaler = StandardScaler()
x = scaler.fit_transform(numeric_data)

x

array([[ 2.07635100e+00,  5.54317531e-01,  2.29924826e+00,
         2.02429799e+00,  2.46094612e+00],
       [ 2.01184041e+00,  1.12325752e+00,  2.04092640e+00,
         2.01247181e+00,  2.36131091e+00],
       [ 1.79716796e+00, -8.50460062e-01,  1.70776898e+00,
         1.99380072e+00,  1.73704335e+00],
       [ 1.58453740e+00, -1.61816591e-01,  1.47863361e+00,
         1.78866964e+00,  1.62677025e+00],
       [ 1.53143145e+00, -1.23370564e+00,  1.32086272e+00,
         1.36790347e+00,  1.18856986e+00],
       [ 1.29157256e+00, -9.02751434e-01,  1.29022731e+00,
         1.13637905e+00,  1.15926614e+00],
       [ 1.35577977e+00,  9.59067227e-01, -8.88066812e-01,
         1.11451232e+00, -1.95209076e-01],
       [ 1.25455734e+00, -5.71773929e-16,  9.29087313e-01,
         1.10520006e+00,  1.03541639e+00],
       [ 1.02583113e+00,  1.01879422e+00,  8.15483696e-01,
         1.01606179e+00,  1.10811721e+00],
       [ 1.09129971e+00, -5.54449026e-01,  7.54050299e-01,
         9.54338943e-01