# Using Google Colab with GitHub




[Google Colaboratory](http://colab.research.google.com) is designed to integrate cleanly with GitHub, allowing both loading notebooks from github and saving notebooks to github.

In [None]:
import numpy as np
import matplotlib.pyplot as plt

# Set random seed for reproducibility
np.random.seed(42)

# Number of samples
n_samples = 1000

# Number of features
n_features = 5

# Generate random feature data
X = np.random.randn(n_samples, n_features)

# Generate random target data
true_coefficients = np.random.randn(n_features)
true_intercept = np.random.randn()
y = np.dot(X, true_coefficients) + true_intercept

# Visualize the data
fig, axs = plt.subplots(n_features, 1, figsize=(8, 6))

for i in range(n_features):
    # Plot the target against each feature
    axs[i].scatter(X[:, i], y, alpha=0.5, label='Data')

    # Compute the best-fit line using numpy's polyfit function
    coefficients = np.polyfit(X[:, i], y, deg=1)
    best_fit_line = np.polyval(coefficients, X[:, i])

    # Plot the best-fit line
    axs[i].plot(X[:, i], best_fit_line, color='red', label='Best Fit Line')

    axs[i].set_xlabel(f'Feature {i+1}')
    axs[i].set_ylabel('Target')
    axs[i].legend()

plt.tight_layout()
plt.show()


In [None]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import make_classification

# Set random seed for reproducibility
np.random.seed(42)

# Number of samples
n_samples = 1000

# Number of features
n_features = 2

# Number of classes
n_classes = 2

# Number of clusters per class
n_clusters_per_class = 2

# Generate classification dataset
X, y = make_classification(
    n_samples=n_samples,
    n_features=n_features,
    n_informative=n_features,
    n_redundant=0,
    n_classes=n_classes,
    n_clusters_per_class=n_clusters_per_class
)

# Plot the data
plt.figure(figsize=(8, 6))

for class_value in np.unique(y):
    # Select data points of the current class
    X_class = X[y == class_value]

    # Plot the data points
    plt.scatter(X_class[:, 0], X_class[:, 1], label=f'Class {class_value}')

plt.xlabel('Feature 1')
plt.ylabel('Feature 2')
plt.legend()
plt.title('Classification Dataset')
plt.show()


In [None]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import make_blobs

# Set random seed for reproducibility
np.random.seed(42)

# Number of samples
n_samples = 1000

# Number of features
n_features = 2

# Number of clusters
n_clusters = 4

# Generate clustering dataset
X, y = make_blobs(
    n_samples=n_samples,
    n_features=n_features,
    centers=n_clusters,
    random_state=42
)

# Plot the data
plt.figure(figsize=(8, 6))
plt.scatter(X[:, 0], X[:, 1], c=y, cmap='viridis')
plt.xlabel('Feature 1')
plt.ylabel('Feature 2')
plt.title('Clustering Dataset')
plt.colorbar()
plt.show()


In [None]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import make_classification
from imblearn.over_sampling import SMOTE
from imblearn.under_sampling import RandomUnderSampler
from collections import Counter

# Set random seed for reproducibility
np.random.seed(42)

# Number of samples
n_samples = 1000

# Number of features
n_features = 2

# Number of classes
n_classes = 2

# Number of clusters per class
n_clusters_per_class = 1

# Imbalance ratio (5% minority class)
imbalance_ratio = 0.05

# Generate imbalanced classification dataset
X, y = make_classification(
    n_samples=n_samples,
    n_features=n_features,
    n_informative=n_features,
    n_redundant=0,
    n_classes=n_classes,
    weights=[imbalance_ratio, 1 - imbalance_ratio],
    n_clusters_per_class=n_clusters_per_class,
    random_state=42
)

# Count the class distribution before oversampling
class_distribution_before = Counter(y)
print("Class distribution before oversampling:", class_distribution_before)

# Plot the data before oversampling
plt.figure(figsize=(8, 6))
plt.scatter(X[:, 0], X[:, 1], c=y, cmap='viridis')
plt.xlabel('Feature 1')
plt.ylabel('Feature 2')
plt.title('Data before oversampling')
plt.colorbar()
plt.show()

# Oversample the minority class using SMOTE
oversampler = SMOTE()
X_resampled, y_resampled = oversampler.fit_resample(X, y)

# Count the class distribution after oversampling
class_distribution_after = Counter(y_resampled)
print("Class distribution after oversampling:", class_distribution_after)

# Plot the data after oversampling
plt.figure(figsize=(8, 6))
plt.scatter(X_resampled[:, 0], X_resampled[:, 1], c=y_resampled, cmap='viridis')
plt.xlabel('Feature 1')
plt.ylabel('Feature 2')
plt.title('Data after oversampling')
plt.colorbar()
plt.show()

# Undersample the majority class using RandomUnderSampler
undersampler = RandomUnderSampler(sampling_strategy={0: 3 * class_distribution_after[1]})
X_resampled, y_resampled = undersampler.fit_resample(X_resampled, y_resampled)

# Count the class distribution after undersampling
class_distribution_after_undersampling = Counter(y_resampled)
print("Class distribution after undersampling:", class_distribution_after_undersampling)

# Plot the data after undersampling
plt.figure(figsize=(8, 6))
plt.scatter(X_resampled[:, 0], X_resampled[:, 1], c=y_resampled, cmap='viridis')
plt.xlabel('Feature 1')
plt.ylabel('Feature 2')
plt.title('Data after undersampling')
plt.colorbar()
plt.show()


In [None]:
import pandas as pd
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

# Load the dataset
iris = load_iris()

# Assign feature data to X and target data to y
X = iris.data
y = iris.target

# Create a pandas DataFrame
df = pd.DataFrame(X, columns=iris.feature_names)

# Display the first 5 rows of the DataFrame
print(df.head())

# Check for missing values
print(df.isnull().sum())

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Print the dimensions of the training set and testing set
print("Training set dimensions:", X_train.shape)
print("Testing set dimensions:", X_test.shape)

# Standardize the feature data in the training set
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)

# Apply the same scaling transformation on the testing set
X_test_scaled = scaler.transform(X_test)

# Print the first 5 rows of the standardized training set
print(pd.DataFrame(X_train_scaled, columns=iris.feature_names).head())


## Loading Public Notebooks Directly from GitHub

Colab can load public github notebooks directly, with no required authorization step.

For example, consider the notebook at this address: https://github.com/googlecolab/colabtools/blob/master/notebooks/colab-github-demo.ipynb.

The direct colab link to this notebook is: https://colab.research.google.com/github/googlecolab/colabtools/blob/master/notebooks/colab-github-demo.ipynb.

To generate such links in one click, you can use the [Open in Colab](https://chrome.google.com/webstore/detail/open-in-colab/iogfkhleblhcpcekbiedikdehleodpjo) Chrome extension.

## Browsing GitHub Repositories from Colab

Colab also supports special URLs that link directly to a GitHub browser for any user/organization, repository, or branch. For example:

- http://colab.research.google.com/github will give you a general github browser, where you can search for any github organization or username.
- http://colab.research.google.com/github/googlecolab/ will open the repository browser for the ``googlecolab`` organization. Replace ``googlecolab`` with any other github org or user to see their repositories.
- http://colab.research.google.com/github/googlecolab/colabtools/ will let you browse the main branch of the ``colabtools`` repository within the ``googlecolab`` organization. Substitute any user/org and repository to see its contents.
- http://colab.research.google.com/github/googlecolab/colabtools/blob/master will let you browse ``master`` branch of the ``colabtools`` repository within the ``googlecolab`` organization. (don't forget the ``blob`` here!) You can specify any valid branch for any valid repository.

## Loading Private Notebooks

Loading a notebook from a private GitHub repository is possible, but requires an additional step to allow Colab to access your files.
Do the following:

1. Navigate to http://colab.research.google.com/github.
2. Click the "Include Private Repos" checkbox.
3. In the popup window, sign-in to your Github account and authorize Colab to read the private files.
4. Your private repositories and notebooks will now be available via the github navigation pane.

## Saving Notebooks To GitHub or Drive

Any time you open a GitHub hosted notebook in Colab, it opens a new editable view of the notebook. You can run and modify the notebook without worrying about overwriting the source.

If you would like to save your changes from within Colab, you can use the File menu to save the modified notebook either to Google Drive or back to GitHub. Choose **File→Save a copy in Drive** or **File→Save a copy to GitHub** and follow the resulting prompts. To save a Colab notebook to GitHub requires giving Colab permission to push the commit to your repository.

## Open In Colab Badge

Anybody can open a copy of any github-hosted notebook within Colab. To make it easier to give people access to live views of GitHub-hosted notebooks,
colab provides a [shields.io](http://shields.io/)-style badge, which appears as follows:

[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/googlecolab/colabtools/blob/master/notebooks/colab-github-demo.ipynb)

The markdown for the above badge is the following:

```markdown
[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/googlecolab/colabtools/blob/master/notebooks/colab-github-demo.ipynb)
```

The HTML equivalent is:

```HTML
<a href="https://colab.research.google.com/github/googlecolab/colabtools/blob/master/notebooks/colab-github-demo.ipynb">
  <img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/>
</a>
```

Remember to replace the notebook URL in this template with the notebook you want to link to.