## 1. Data Upload

In [None]:
import pandas as pd
import numpy as np
from io import StringIO
from sklearn.manifold import MDS, TSNE
from sklearn.preprocessing import StandardScaler
from sklearn import datasets
from scipy.optimize import minimize

In [None]:
# Load the Iris dataset
iris = datasets.load_iris()
iris_features = iris.data
iris_labels = iris.target
iris_target_names = iris.target_names

## 2. Multi-dimensional scaling

In [None]:
# Standardize the data
scaler = StandardScaler()
X_scaled = scaler.fit_transform(iris_features)

# Apply MDS to reduce the dimensions to 2
mds = MDS(n_components=2, normalized_stress='auto')
X_mds = mds.fit_transform(X_scaled)

# Apply t-SNE to reduce the dimensions to 2
# tsne = TSNE(n_components=2, random_state=42, n_iter=500)
# X_tsne = tsne.fit_transform(X_scaled)

# Convert the reduced data back to a pandas DataFrame
iris_reduced = pd.DataFrame(X_mds, columns=["x1", "x2"])
iris_reduced['labels'] = iris_labels


In [None]:
iris_reduced

## 3. Visual analytics

In [None]:
def interpolate_coordinates(x_1, x_n, n):
    """
    This function calculates the values of the interpolate_coordinates
    of a line based on its first and last coordinates.
    :param x_1:
    :param x_n:
    :param n:
    :return:
    """
    # Ensure n is greater than 1 to avoid division by zero
    if n <= 1:
        raise ValueError("n must be greater than 1 to calculate intermediate points.")

    # List to hold all the coordinates including the first and the last
    inter_coordinates = []

    # Calculate each intermediate coordinate
    for i in range(0, n):
        x_i = x_1 + ((x_n - x_1) * i) / (n - 1)
        inter_coordinates.append(x_i)

    return inter_coordinates


In [None]:
x_1_first = 8
x_2_first = 4

x_1_last = 5
x_2_last = -4

# The number of intermediate points
num_inter_points = iris_features.shape[1]

x_1_inter_coordinates = interpolate_coordinates(x_1_first, x_1_last, num_inter_points)
x_2_inter_coordinates = interpolate_coordinates(x_2_first, x_2_last, num_inter_points)
# print(x_1_inter_coordinates)
# print(x_2_inter_coordinates)

# Create a DataFrame from the coordinates
coordinates_df = pd.DataFrame({'x1': x_1_inter_coordinates, 'x2': x_2_inter_coordinates})
coordinates_df


In [None]:
import matplotlib.pyplot as plt
import matplotlib.ticker as ticker
%config InlineBackend.figure_format = 'svg' 
plt.style.use('ggplot')

NUM_TICKS = 2

# Define colors for each species
colors = ['red', 'green', 'blue']

# Plotting
plt.figure(figsize=(10, 8))

# Set tick frequency using MultipleLocator
ax = plt.gca()
ax.xaxis.set_major_locator(ticker.MultipleLocator(NUM_TICKS))
ax.yaxis.set_major_locator(ticker.MultipleLocator(NUM_TICKS))

# Plot each species
for i, species in enumerate(iris_target_names):
    subset = iris_reduced[iris_reduced['labels'] == i]
    plt.scatter(subset['x1'], subset['x2'], color=colors[i], label=species)

    # Annotate each point with its class label
    for _, row in subset.iterrows():
        plt.annotate(str(i), (row['x1'], row['x2']), textcoords="offset points", xytext=(6, -6), ha='center', fontsize=8)

# Draw the separating line
line_x1 = coordinates_df['x1'].tolist()
line_x2 = coordinates_df['x2'].tolist()
plt.plot(line_x1, line_x2, 'k-')  # 'k-' for black line

# Highlight points on the separating line
# If coordinates_df contains these points, plot them with a distinct style
plt.scatter(line_x1, line_x2, color='k', marker='o', edgecolor='k', label='Line Points')

plt.xlabel('X1')
plt.ylabel('X2')
plt.title('MDS of Iris Dataset')
plt.legend()
plt.show()

## 4. Calculate Transition Matrix

### 3.1. Singular Value Decomposition (SVD)

In [None]:
# Receive values from the matrix A DataFrame 
formal_model_reduced_values = iris_reduced.drop('labels', axis=1).values

# Calculate SVD
U, s, Vt = np.linalg.svd(formal_model_reduced_values)

# Construct the full diagonal matrix
S = np.zeros(formal_model_reduced_values.shape)
for i in range(min(formal_model_reduced_values.shape)):
    S[i, i] = s[i]

# print("U:\n", U)
# print("\nS:\n", S)
# print("\nVt:\n", Vt)


In [None]:
formal_model_reduced_values

### 3.2. Create the reconstructed matrix of SVD

In [None]:
# Recompute the original matrix using U, S, and Vt
reconstructed_matrix = U.dot(S.dot(Vt))

# Convert the reconstructed matrix to a pandas DataFrame, if desired
reconstructed_df = pd.DataFrame(reconstructed_matrix)


In [None]:
reconstructed_df

In [None]:
iris_reduced

### 3.3. Calculate the generalized inverse of input matrix

In [None]:
# Calculate the generalized (Moore-Penrose) inverse
formal_model_reduced_pinv = np.linalg.pinv(formal_model_reduced_values)

# Convert to a pandas DataFrame
formal_model_reduced_pinv_df = pd.DataFrame(formal_model_reduced_pinv)

In [None]:
formal_model_reduced_pinv_df

### 3.4. Calculate transition matrix T

In [None]:
transition_matrix_T = formal_model_reduced_pinv_df.dot(iris_features)

In [None]:
transition_matrix_T

## 4. Create Hyperplane

In [None]:
coordinates_hyperplane_df = coordinates_df.values @ transition_matrix_T.values

In [None]:
coordinates_hyperplane_df

In [None]:
# Adding a new column with all values set to 1
new_column = np.ones((coordinates_hyperplane_df.shape[0], 1))
feature_vector_inverse = np.hstack((new_column, coordinates_hyperplane_df))

feature_vector_inverse

In [None]:
# Optimization criteria function
def optimization_criteria(W, X):
    return np.sum(np.abs(X.dot(W.T)))

# Initial guess (starting point for the optimization algorithm)
weights_inverse_initial = np.zeros(feature_vector_inverse.shape[1])

# Bounds for W (setting it between -1 and 1)
bounds = [(-1, 1) for _ in range(feature_vector_inverse.shape[1])]

# The actual optimization
res = minimize(
    optimization_criteria,
    weights_inverse_initial,
    args=(feature_vector_inverse,),
    method='SLSQP',
    bounds=bounds,
    options={'disp': True}
)

# Checking if the optimization was successful
if res.success:
    # Extract the weights into a DataFrame
    weights_inverse_output = pd.DataFrame(res.x, index=[f'w{i}' for i in range(len(res.x))], columns=['weight'])

    print("Optimization was successful. The weights are:")
    print(weights_inverse_output)

    # Output the value of the optimization criteria
    print("\nValue of the optimization criteria (objective function value):", res.fun)
else:
    print("Optimization failed.")

# Checking for convergence
print("\nConvergence status:", res.message)


In [None]:
weights_inverse_output

In [None]:
# Reorder the weight vector so that w0 is at the end.
weights_df_reordered = pd.concat([weights_inverse_output.iloc[1:], weights_inverse_output.iloc[0:1]]).reset_index(drop=True)

# Show the modified DataFrame
weights_df_reordered


In [None]:
# Convert iris_features to a DataFrame
# and an additional column
new_formal_model_features = pd.DataFrame(iris_features, columns=iris.feature_names)
new_formal_model_features['ones'] = 1
new_formal_model_features


In [None]:
# # Multiply the features by the weights
my_result = new_formal_model_features.mul(weights_df_reordered['weight'].values, axis=1)

# Sum along the rows to get the final single column for predictions
my_predictions = my_result.sum(axis=1)

# Convert the series to a data frame
my_predictions_df = my_predictions.to_frame(name='Prediction')

In [None]:
my_predictions_df

In [None]:
iris_labels_df = pd.DataFrame(iris_labels, columns=['labels'])

my_predictions_df = pd.concat([my_predictions_df, iris_labels_df], axis=1)


In [None]:
my_predictions_df

In [None]:
my_predictions_df.to_csv('.\projects_temp-data\my_predictions_df_iris.csv', index=False)

In [None]:
def count_positives_negatives(df):
    """
    Count the number of positive and negative numbers in the 'Prediction' column for each unique value in the 'Target' column.

    :param df: DataFrame with 'Target' and 'Prediction' columns
    :return: DataFrame with the count of positive and negative predictions for each target
    """
    # Group by 'Target' and then apply the counting logic
    result = df.groupby('labels')['Prediction'].agg(
        positive_count=lambda x: (x > 0).sum(),
        negative_count=lambda x: (x < 0).sum(),
        sum=lambda x: x.count()
    ).reset_index()

    return result

# Apply the function to the predictions DataFrame
result_df = count_positives_negatives(my_predictions_df)
result_df