In [2]:
%load_ext autoreload
%autoreload 2

import plotly_scientific_plots as psp
import numpy as np

### Single Histogram

Plot a histogram of data. Automatically perform statistical tests for mean position (e.g. is mean different from 0). These include:
* W: Wilcoxon signed-rank test.
* T: T-test for the mean of ONE group of scores

The P-values from the tests are printed in the title

Note that the binning is done internally in numpy and thus this solution is MUCH faster than the plotly histogram functions

In [11]:
data_source_1 = np.random.randn(800)

psp.plotHist(data_source_2, title='Dataset 1', diff_tst = 1)

### Two Histograms

Plot two histograms. Automatically perform testing to see if the two distributions differ. Tests include:
* KS: Kolmogorov-Smirnov statistic on 2 samples.
* MW: Mann-Whitney rank test on two samples.
* T: T-test for the means of two independent samples of scores.

The P-values from the tests are printed in the title

In [10]:
data_source_1 = np.random.randn(800)
data_source_2 = np.random.randn(200)+.35

psp.plot2Hists(data_source_1, data_source_2, names=['Data 1','Data 2'],
           normHist=True, title='Comparison of 2 Data Sources',
           KS=True, MW=True, T=True)

### Correlation Scatter Plots

Plot the relationship between two variables and automatically perform statistical tests for correlation including:
* Pearson's correlation (R^2 and P-value)
* Spearman's correlation (R^2 and P-value)
* Linear model of the two variables

In [10]:
data_source_1 = np.random.randn(800)
data_source_3 = 3*data_source_1 +4 + 1.5*np.random.randn(800)

psp.corrPlot(data_source_1, data_source_3, addCorr=True, addCorrLine=True, 
         title='Correlation of x_var & y_var', xlbl='x_var label', ylbl='y_var label')

### Scatter+Contour Plot

In [34]:
psp.scatterHistoPlot(data_source_1, data_source_3, title='Contour of x_var & y_var', xlbl='x_var label', ylbl='y_var label')

### Multiple data sources

Note that hovering on correlation line will show correlation statistics

In [29]:


data_source_11 = np.random.randn(800)
data_source_31 = 7*data_source_11 +1 - 1.5*np.random.randn(800)

data_source_12 = np.random.randn(800)
data_source_32 = -3*data_source_12 - 2 + 2.5*np.random.randn(800)

psp.corrPlot([data_source_1, data_source_11, data_source_12], [data_source_3, data_source_31, data_source_32], 
             names=['datasetA', 'datasetB', 'datasetC'],addCorr=True, addCorrLine=True, title='Correlation of x_var & y_var', 
             xlbl='x_var label', ylbl='y_var label')

### Paired Scatterplot 

Plot scatterplot of paired samples.

##### TODO: Add statistical testing to see if paired difference exists (ie same tests as in plotHist)

In [20]:
data_source_1 = np.random.randn(800)
data_source_4 = .25*data_source_1 + 0.75*np.random.randn(800)+1

psp.corrPlot(data_source_1, data_source_4, names=['x_var', 'y_var'], addXYline=True, addCorr=False, addCorrLine=False, 
         title='Paired comparison of x_var & y_var', xlbl='x_var label', ylbl='y_var label')

### Polar histogram
Make a histogram of polar data.

##### TODO: Add circular statistical testing for nonuniformity, peaks, and population differences 

In [22]:
polar1 = np.random.uniform(-np.pi, np.pi, (100))

psp.plotPolar([polar1], numbins=20, title='Polar Distribution 1')

In [23]:
polar1 = np.random.uniform(-np.pi, np.pi, (100))
polar2 = np.random.uniform(-np.pi, np.pi, (200))

fig = psp.plotPolar([polar1, polar2], names=['polar1', 'polar2'], numbins=20, title='Polar Distribution Comparison')