# Single-cell RNA-seq imputation using DeepImpute

Here is a comprehensive tutorial to understand the functionnalities of DeepImpute.

In [None]:
from deepimpute.multinet import MultiNet
import pandas as pd

# Load dataset using pandas
data = pd.read_csv('test.csv',index_col=0)
print('Working on {} cells and {} genes'.format(*data.shape))

## Create a DeepImpute multinet

In [None]:
# Using default parameters
multinet = MultiNet() 

In [None]:
# Using custom parameters
NN_params = {
        'learning_rate': 1e-4,
        'batch_size': 64,
        'max_epochs': 200,
        'ncores': 5,
        'sub_outputdim': 512,
        'architecture': [
            {"type": "dense", "activation": "relu", "neurons": 200},
            {"type": "dropout", "activation": "dropout", "rate": 0.3}]
    }

multinet = MultiNet(**NN_params)

## Fit the networks

In [None]:
# Using all the data
multinet.fit(data,cell_subset=1,minVMR=0.5)

In [None]:
# Using 80% of the data
multinet.fit(data,cell_subset=0.8)

In [None]:
# Using 200 cells (randomly selected)
multinet.fit(data,cell_subset=200)

In [None]:
# Custom fit
trainingData = data.iloc[100:250,:]
multinet.fit(trainingData)

## Imputation

The imputation can be done on any dataset as long as the gene labels are the same

In [None]:
imputedData = multinet.predict(data)

## Visualization

In [None]:
import matplotlib.pyplot as plt
import numpy as np

limits = [0,100]

fig,ax = plt.subplots()

jitter = np.random.normal(0,1,data.size) # Add some jittering to better see the point density
ax.scatter(data.values.flatten()+jitter,imputedData.values.flatten(),s=2)
ax.plot(limits,limits,'r-.',linewidth=2)
ax.set_xlim(limits)
ax.set_ylim(limits)

plt.show()


## Scoring

In [None]:
multinet.score(data)