If you haven't installed dataprep, run command `pip install dataprep` or execute the following cell

In [None]:
# Run me if you'd like to install
!pip install dataprep

In [None]:
import pandas as pd
from dataprep.eda import plot, plot_correlation, plot_missing

## Load data

In [None]:
df = pd.read_csv("https://s3-us-west-2.amazonaws.com/dataprep.dsl/datasets/suicide-rate.csv")

## Plot the distribution of each column in the dataframe. 
For numeric column, show the histogram. For categorical column, show bar chart.

In [None]:
df["year"] = df["year"].astype("category")
plot(df)

# Show the plots of the given column. If column is numeric, show keneral density plot, box plot and qqnorm plot.
If column is categorical, show bar plot and pie plot.

In [None]:
plot(df, "sex")
plot(df, "gdp_per_capita")

# Show the plots of the relationship of given two columns. 
* For numeric-categorical, show the box plot for each category.
* For numeric-numeric, show the heatmap
* For categorical-categorical, show the bar chart of col_x for each category of col_y

In [None]:
plot(df, "suicides", "sex")
plot(df, "population", "suicides")
plot(df, "country", "generation")

## Show correlation matrix plots using each method (pearson, kendall, spearman)
If k is specified, in each matrix plot, only show top-k positive cells, set the color of other cells to white. (Do you want to know the top-k negative cells?)

In [None]:
df_without_missing = df.dropna('columns')
plot_correlation(df_without_missing)
plot_correlation(df_without_missing, k=1)
plot_correlation(df_without_missing, value_range=(0,1))

# Show the 3 cols that corresponds to x in the correlation matrix (pearson, kendall, spearman)
if k is specified, sort the result based on corr. show the 3 cols that corresponds the top-k correlation value

In [None]:
plot_correlation(df_without_missing, "suicides")
plot_correlation(df_without_missing, "suicides", k=2)

# if value_range is specified, show the correlation value in value_range.

In [None]:
plot_correlation(df_without_missing, "suicides", value_range=[-1, 0.3])

# if no correlation in the range, show blank fig.

In [None]:
plot_correlation(df_without_missing, "suicides", value_range=[-1, -0.8])

In [None]:
plot_correlation(df_without_missing, x="population", y="suicides_no")
plot_correlation(df_without_missing, x="population", y="suicides", k=5)


## show the location/position and percentage of missing data

In [None]:
plot_missing(df, num_bins=100)

## If one want to remove the rows whose x is missing, 
the impact of the removed rows on other columns. 

In [None]:
plot_missing(df, 'HDI_for_year')

## If one want to remove the rows whose x is missing, the impact of the removed rows on y columns. 

In [None]:
plot_missing(df, 'HDI_for_year', 'population')
plot_missing(df, 'HDI_for_year', 'sex')
plot_missing(df, 'HDI_for_year', "country")