In [239]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [240]:
import pandas as pd 
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.metrics import classification_report
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from matplotlib.colors import ListedColormap
%matplotlib inline
from scipy import stats

!pip install openpyxl

In [241]:
dfx = pd.read_csv('../input/catero/realesatet.csv')
dfx.head()

In [242]:
dfx.describe()

In [243]:
display(dfx)

In [244]:
dfc = dfx.drop(['Code','Sharia','Type of FS'], axis=1)
dfc

In [245]:
plt.figure(figsize=(15, 15))
sns.pairplot(dfc, vars = dfc.columns[1:15]);

In [246]:
dfx = pd.read_csv('../input/realestatefinance/realesatet.csv')
dfc = dfx[['Profit for the Period','Price to BV, x','EPS, IDR','Category']]
dfc.describe()

In [247]:
plt.figure(figsize=(15, 4))
plt.xticks(rotation=45)
sns.scatterplot(x='Code', y='Profit for the Period',
                data=dfx, s=80)

In [248]:
plt.figure(figsize=(15, 4))
plt.xticks(rotation=45)
sns.scatterplot(x='Code', y='Price to BV, x', 
                data=dfx, s=80)

In [249]:
plt.figure(figsize=(15, 4))
plt.xticks(rotation=45)
sns.scatterplot(x='Code', y='EPS, IDR', 
                data=dfx, s=80)

In [250]:
print(f'Jumlah baris sebelum memfilter outlier: {len(dfx)}')

filtered_entries = np.array([False] * len(dfx))
for col in ['Price to BV, x']:
    zscore = abs(stats.zscore(dfx[col]))
    filtered_entries = (zscore < 3) | filtered_entries
    
dfx = dfx[filtered_entries]

print(f'Jumlah baris setelah memfilter outlier: {len(dfx)}')

In [251]:
dfc.describe()

In [252]:
feats = ['Price to BV, x','EPS, IDR']
X = dfx[feats].values
y = dfx['Category'].values

from sklearn.preprocessing import StandardScaler
X_std = StandardScaler().fit_transform(X)
dfcs = pd.DataFrame(data = X_std, columns = feats)
dfcs.describe()

In [253]:
# KMEANS
from sklearn.cluster import KMeans
from sklearn.preprocessing import StandardScaler, normalize
from sklearn.decomposition import PCA
from sklearn.metrics import silhouette_score

inertia = []

for i in range(1, 11):
  kmeans = KMeans(n_clusters=i, init='k-means++', max_iter=300, n_init=10, random_state=0)
  kmeans.fit(dfcs.values)
  inertia.append(kmeans.inertia_)

plt.figure(figsize=(12, 6))
plt.title('Goods Financial Data and Ratio')
plt.xlabel('No. of Cluster')
plt.ylabel('Intertia')
plt.plot(inertia) #Elbow Method
plt.show()

In [254]:
from sklearn.cluster import KMeans
kmeans = KMeans(n_clusters=2, init='k-means++', max_iter=300, n_init=10, random_state=0)
kmeans.fit(dfcs.values)

In [255]:
dfcs['target'] = y
dfcs['fit'] = kmeans.labels_

In [256]:
plt.rcParams["figure.figsize"] = (8, 8)
sns.scatterplot(x='Price to BV, x', y='EPS, IDR', hue=dfcs.target.to_list(), data=dfcs);

In [257]:
dfcs.head(5)

In [258]:
dfcs.insert(0,"Category",np.nan,True)
dfcs.loc[dfcs['fit'] == 0, 'Category'] = 0
dfcs.loc[dfcs['fit'] == 1, 'Category'] = 1
dfcs.loc[dfcs['fit'] == 2, 'Category'] = 2
dfcs['Category'] = dfcs['Category'].astype(int)
dfcs.head(5)

In [259]:
dfcs.drop(["fit","target"], axis=1, inplace=True)
dfcs.columns = ['Category','Price to BV, x (St.)','EPS, IDR (St.)']
dfcs.info()

In [261]:
from mpl_toolkits.mplot3d import Axes3D

fig = plt.figure()
ax = fig.add_subplot(111, projection = '3d')


x = dfcs['Price to BV, x (St.)']
y = dfcs['EPS, IDR (St.)']

ax.set_xlabel("D/E Ratio, x (St.)")
ax.set_ylabel("ROA, % (St.)")

cmap = ListedColormap(sns.color_palette("RdYlGn", 256).as_hex())

sc = ax.scatter(x, y, s=40, c=dfcs['Category'], marker='o', cmap=cmap, alpha=1)
plt.legend(*sc.legend_elements(), bbox_to_anchor=(1.05, 1), loc=2)

plt.show()

In [262]:
# drop these row indexes
# from dataFrame
index_names = dfx[ dfx['Code'] == 'ADES' ].index
dfx.drop(index_names, inplace = True)
dfx.reset_index(drop=True, inplace=True)
dfx.drop(["Category",
         "Assets, b.IDR",
          "Liabilities, b.IDR",
          "Equity, b.IDR",
          "EBT, b.IDR",
          "Profit for the Period",
          "Profit attr.to owner's",
          "EPS, IDR",
          "Book Value, IDR",
          "P/E Ratio, x",
          "Price to BV, x",
          "ROE, %",
          "NPM, %"
         ], axis=1, inplace=True)

In [263]:
df_clus = pd.concat([dfcs, dfx], axis=1)
display(df_clus)

In [265]:
kmeans.cluster_centers_

In [264]:
from mpl_toolkits.mplot3d import Axes3D

fig = plt.figure()
ax = fig.add_subplot(111, projection = '3d')
 
x = [-0.88562313,-0.1093027,-0.22968445,2.02570235,0.54598569]
y = [1.00842749,0.31406534,-0.31541188,0.44976355,-1.80473117]
z = [-0.38433879,2.72180023,-0.34134499,-0.04009109,-0.35620788]

ax.set_xlabel("D/E Ratio, x (St.)")
ax.set_ylabel("ROA, % (St.)")
ax.scatter(x, y)
plt.show()

In [None]:
display(df_clus[df_clus['Category'] == 1])

In [266]:
df_clus.info()

In [269]:
display(df_clus[df_clus['Category'] == 0])

MULTINOMIAL

In [267]:
df_mlrx = df_clus
df_mlrx.info()

In [272]:
df_mlrx = df_clus.drop(['Code','Stock Name','Sales, b.IDR', 'D/E Ratio, x', 'ROA, %'], axis=1)
display(df_mlrx)

In [273]:
df_mlrx.loc[df_mlrx["Category"] == 0, "Category"] = "High Negatively"
df_mlrx.loc[df_mlrx["Category"] == 1, "Category"] = "Negatively"
df_mlrx.loc[df_mlrx["Category"] == 2, "Category"] = "Neutral"
df_mlrx.loc[df_mlrx["Category"] == 3, "Category"] = "Positively"
df_mlrx.loc[df_mlrx["Category"] == 4, "Category"] = "High Positively"

display(df_mlrx)

In [274]:
#dataframe.iloc[:,start_col:end_col]

In [275]:
X = df_mlrx.iloc[:, 1:4]
y = df_mlrx.iloc[:, -4]
print(X.head(5))
print(y.head(5))

In [277]:
X_train, x_test, y_train, y_test = train_test_split(X,y,test_size=0.3)

In [279]:
model = LogisticRegression(multi_class='auto')
model.fit(X_train, y_train)

In [280]:
model.predict_proba(x_test)[0:4]

In [281]:
predictions = model.predict(x_test)
predictions

In [282]:
print(classification_report(y_test, predictions))

In [283]:
print(accuracy_score(y_test, predictions))