In [26]:
import pandas as pd 
import numpy as np
!pip install openpyxl
import matplotlib.pyplot as plt
import seaborn as sns
from matplotlib.colors import ListedColormap
import os
%matplotlib inline
from scipy import stats

In [27]:
df0 = pd.read_excel('../input/datainput-ne/data-input (2).xlsx')
df0.info()

In [28]:
df0.describe()

In [29]:
df0.drop(["Emiten","Net Profit Margin"], axis=1, inplace=True)

In [30]:
plt.figure(figsize=(15, 15))
sns.pairplot(df0, vars = df0.columns[1:13]);

In [31]:
df0 = pd.read_excel('../input/datainput-ne/data-input (2).xlsx')
df1 = df0[['DER','Return Of Asset','Sales Growth','Category',]]
df1.describe()

In [32]:
plt.figure(figsize=(18, 4))
plt.xticks(rotation=45)
sns.scatterplot(x='Emiten', y='Return Of Asset',
                data=df0, s=80)

In [33]:
plt.figure(figsize=(18, 4))
plt.xticks(rotation=45)
sns.scatterplot(x='Emiten', y='DER', 
                data=df0, s=80)

In [34]:
plt.figure(figsize=(18, 4))
plt.xticks(rotation=45)
sns.scatterplot(x='Emiten', y='Sales Growth', 
                data=df0, s=80)

In [35]:
print(f'Jumlah baris sebelum memfilter outlier: {len(df1)}')

filtered_entries = np.array([False] * len(df1))
for col in ['Return Of Asset']:
    zscore = abs(stats.zscore(df1[col]))
    filtered_entries = (zscore < 3) | filtered_entries
    
df1 = df1[filtered_entries]

print(f'Jumlah baris setelah memfilter outlier: {len(df1)}')

In [36]:
df1.describe()


In [37]:
feats = ['DER','Return Of Asset','Sales Growth']
X = df1[feats].values
y = df1['Category'].values

from sklearn.preprocessing import StandardScaler
X_std = StandardScaler().fit_transform(X)
new_df = pd.DataFrame(data = X_std, columns = feats)
new_df.describe()

In [38]:
from sklearn.cluster import KMeans
inertia = []

for i in range(1, 7):
  kmeans = KMeans(n_clusters=i, init='k-means++', max_iter=300, n_init=10, random_state=0)
  kmeans.fit(new_df.values)
  inertia.append(kmeans.inertia_)

plt.figure(figsize=(12, 6))
plt.plot(inertia) #Elbow Method

In [39]:
from sklearn.cluster import KMeans
kmeans = KMeans(n_clusters=5, init='k-means++', max_iter=300, n_init=10, random_state=0)
kmeans.fit(new_df.values)

In [40]:
new_df['target'] = y
new_df['fit'] = kmeans.labels_

In [41]:
plt.rcParams["figure.figsize"] = (8, 8)
sns.scatterplot(x='DER', y='Return Of Asset', hue=new_df.target.to_list(), data=new_df);

In [42]:
new_df.head(5)

In [43]:
new_df.insert(0,"Category",np.nan,True)
new_df.loc[new_df['fit'] == 0, 'Category'] = 2
new_df.loc[new_df['fit'] == 1, 'Category'] = 4
new_df.loc[new_df['fit'] == 2, 'Category'] = 1
new_df.loc[new_df['fit'] == 3, 'Category'] = 0
new_df.loc[new_df['fit'] == 4, 'Category'] = 3
new_df['Category'] = new_df['Category'].astype(int)
new_df.head(5)

In [44]:
new_df.drop(["fit","target"], axis=1, inplace=True)
new_df.columns = ['Category','DER (St.)','Return Of Asset (St.)', 'Sales Growth (St.)']
new_df.info()

In [45]:
from mpl_toolkits.mplot3d import Axes3D

fig = plt.figure()
ax = fig.add_subplot(111, projection = '3d')


x = new_df['DER (St.)']
y = new_df['Return Of Asset (St.)']
z = new_df['Sales Growth (St.)']

ax.set_xlabel("DER (St.)")
ax.set_ylabel("R Of Asset (St.)")
ax.set_zlabel("Sales Growth (St.)")

cmap = ListedColormap(sns.color_palette("RdYlGn", 256).as_hex())

sc = ax.scatter(x, y, z, s=40, c=new_df['Category'], marker='o', cmap=cmap, alpha=1)
plt.legend(*sc.legend_elements(), bbox_to_anchor=(1.05, 1), loc=2)

plt.show()

In [46]:
# color palette as dictionary
palette = {0:"tab:red",
           1:"tab:orange", 
           2:"yellow",
           3:"lime",
           4:"darkgreen"}

sns.scatterplot(x='DER (St.)', y='Sales Growth (St.)', 
                hue='Category', data=new_df, s=80,
                palette=palette)
plt.title("DER vs Sales Growth (Standardisasi)", fontsize=18, y=1.05)
plt.show()

In [47]:
sns.scatterplot(x='Return Of Asset (St.)', y='Sales Growth (St.)', 
                hue='Category', data=new_df, s=80,
                palette=palette)
plt.title("Return Of Asset vs Sales Growth (Standardisasi)", fontsize=18, y=1.05)
plt.show()

In [48]:
sns.scatterplot(x='DER (St.)', y='Return Of Asset (St.)', 
                hue='Category', data=new_df, s=80,
                palette=palette)
plt.title("DER vs Return Of Asset (Standardisasi)", fontsize=18, y=1.05)
plt.show()

In [49]:
# drop these row indexes
# from dataFrame
index_names = df0[ df0['Emiten'] == 'ARTO' ].index
df0.drop(index_names, inplace = True)
df0.reset_index(drop=True, inplace=True)
df0.drop(["Category",
          "Net Profit Margin",
         ], axis=1, inplace=True)

In [56]:
dftemp = pd.read_excel('../input/indusemite/industry-emiten-list.xlsx')
df = pd.concat([dftemp, new_df, df0], axis=1)
df.head(5)

In [73]:
df.to_excel('../input/baruresult/newresult.xlsx', index = False)

In [59]:
kmeans.cluster_centers_

In [60]:
from mpl_toolkits.mplot3d import Axes3D

fig = plt.figure()
ax = fig.add_subplot(111, projection = '3d')
x = [0.07796577,3.3825386,-0.19978728,-0.22183117,-0.32525515]
y = [1.27721216,-0.22442569,-1.39490532,0.15549501,-0.03911061]
z = [-0.22773712,0.35006475,-0.89274149,4.44060221,0.14030381]

ax.set_xlabel("DER (St.)")
ax.set_ylabel("Return Of Asset")
ax.set_zlabel("Sales Growth (St.)")
ax.scatter(x, y, z)
plt.show()

In [62]:
#Highly Positively Impacted
df[df['Category'] == 4].describe() 

In [61]:
#Positively Impacted
df[df['Category'] == 3].describe()

In [52]:
#Neutral Impacted
df[df['Category'] == 2].describe()

In [53]:
#Negatively Impacted
df[df['Category'] == 1].describe()

In [54]:
#Highly Negatively Impacted
df[df['Category'] == 0].describe()