## Quick Start with GIT in Colab

### Install the `GIT Clutring` Package

In [None]:
!pip install -i "https://test.pypi.org/simple/" git_cluster > /dev/null 
!pip install -U gdown > /dev/null
!gdown 1yNwCStP3Sdf2lfvNe9h0WIZw2OQ3O2UP && unzip datasets.zip > /dev/null

### Import the `GIT Clustring Algo` and it's utilities

In [None]:
from git_cluster import GIT
from utils import matchY, measures_calculator, autoPlot
from dataloaders import Toy_DataLoader as Toy_DataLoader
from dataloaders import Real_DataLoader as Real_DataLoader

### Apply GIT on **Toys Datasets**

#### Circles

In [None]:
X,Y_true = Toy_DataLoader(name='circles', 
                          path="/content/datasets/toy_datasets").load()

Y_pred = GIT(k=12).fit_predict(X)
autoPlot(X, Y_pred)

Y_pred, Y_true = matchY(Y_pred, Y_true)

result = measures_calculator(Y_true, Y_pred)
result

#### Moons

In [None]:
X, Y_true = Toy_DataLoader(name='moons', 
                           path="/content/datasets/toy_datasets").load()

Y_pred = GIT(k=30).fit_predict(X)
autoPlot(X, Y_pred)

Y_pred,Y_true = matchY(Y_pred, Y_true)

result = measures_calculator(Y_true, Y_pred)
result

#### S-set

In [None]:
X, Y_true = Toy_DataLoader(name='s-set', 
                           path="/content/datasets/toy_datasets").load()

Y_pred = GIT(k=10,target_ratio=[1 for i in range(15)]).fit_predict(X)

autoPlot(X, Y_pred)
Y_pred, Y_true = matchY(Y_pred, Y_true)

result = measures_calculator(Y_true, Y_pred)
result

#### Smile

In [None]:
X, Y_true = Toy_DataLoader(name='smile', 
                           path="/content/datasets/toy_datasets").load()

Y_pred = GIT(k=20).fit_predict(X)

autoPlot(X, Y_pred)
Y_pred, Y_true = matchY(Y_pred, Y_true)

result = measures_calculator(Y_true, Y_pred)
result

#### Impossible

In [None]:
# Load the data
X,Y_true = Toy_DataLoader(name='impossible', 
                          path="/content/datasets/toy_datasets").load()

# Create an instance of the GIT clustering 
git = GIT(k=9, target_ratio=[2, 1, 1, 1, 1, 1, 1])

# Apply the GIT algorithm to predict the clusters in the data
Y_pred = git.fit_predict(X)

# Plot the clusters and summarize their statistics in a DataFrame
git.plot_and_summarize_clusters(X, Y_pred)

In [None]:
result = measures_calculator(Y_true, Y_pred)
result

#### Apply GIT on **Real Datasets**

#### Iris Dataset

In [None]:
X, Y_true = Real_DataLoader(name='iris', path="/content/datasets/real_datasets").load()

Y_pred = GIT(k=15, 
             target_ratio=[1., 1,1]).fit_predict(X)

Y_pred,Y_true = matchY(Y_pred, Y_true)
result = measures_calculator(Y_true, Y_pred)
result

#### Hepatitis

In [None]:
X, Y_true = Real_DataLoader(name='hepatitis', 
                            path="/content/datasets/real_datasets").load()

Y_pred = GIT(k=8, 
             target_ratio=[ 1., 1]).fit_predict(X)

Y_pred, Y_true = matchY(Y_pred, Y_true)
result = measures_calculator(Y_true, Y_pred)
result