# ChatGPT code debugging

# Example 1: Code has a bug

In [None]:
# import libraries
import matplotlib.pyplot as plt
import numpy as np

## generate some data
N = 40
A = np.random.randint(low=1,high=6,size=N)
B = np.ceil( (A + np.random.randint(low=1,high=5,size=N) )/9 * 4 )

# compute the correlation
cr = stats.kendalltau(A)

# plot the data
plt.plot(A,B,'ks',markersize=10,markerfacecolor=[0,0,0,.25])
plt.grid()
plt.title('$r_k$ = %g'%cr)

plt.show()

# Example 2: Incomplete code

In [None]:
# import libraries
import numpy as np
import pandas as pd



## simulate data to be used in the ANOVA

# group means
mean1 = 4
mean2 = 3.8
mean3 = 7

# samples per group
N1 = 30
N2 = 35
N3 = 29

# standard deviation (assume common across groups)
stdev = 2


## now to simulate the data
data1 = mean1 + np.random.randn(N1)*stdev
data2 = mean2 + np.random.randn(N2)*stdev
data3 = mean3 + np.random.randn(N3)*stdev

datacolumn = np.hstack((data1,data2,data3))

# group labels
groups = ['1']*N1 + ['2']*N2 + ['3']*N3



## convert the data from variables into a pandas dataframe
df = pd.DataFrame({'TheData':datacolumn,'Group':groups})


# run a 1-way ANOVA and report the results using the pengioun library


# make a boxplot of the results, showing one box for each data level



# Example 3: error-free code contains a statistical normalization error

In [None]:
import numpy as np
import matplotlib.pyplot as plt

data = np.random.gamma(2,2,size=500)

# raw histogram values
y,x = np.histogram(data,bins=40)
x = (x[:-1]+x[1:])/2

# normalize to percent
yp = (100*y) / np.mean(y)

_,axs = plt.subplots(1,2,figsize=(10,5))

axs[0].plot(x,y,linewidth=3,color='k')
axs[0].set_ylabel('Counts')
axs[0].set_xlabel('Data values')
axs[0].set_xlim(x[[0,-1]])


axs[1].plot(x,yp,linewidth=3,color='k')
axs[1].set_ylabel('Percentage')
axs[1].set_xlabel('Data values')
axs[1].set_xlim(x[[0,-1]])

plt.tight_layout()
plt.show()

# Example 4: Nothing wrong

In [None]:
# import libraries
import matplotlib.pyplot as plt
import numpy as np
import scipy.stats as stats

## example using log-normal distribution

# variable to evaluate the functions on
x = np.linspace(.001,5,1001)

# note the function call pattern...
p1 = stats.lognorm.pdf(x,1)
c1 = stats.lognorm.cdf(x,1)

p2 = stats.lognorm.pdf(x,.1)
c2 = stats.lognorm.cdf(x,.1)



# draw the pdfs
fig,ax = plt.subplots(2,1,figsize=(4,7))

ax[0].plot(x,p1/sum(p1), x,p2/sum(p2))
ax[0].set_ylabel('probability')
ax[0].set_title('pdf(x)')

# draw the cdfs
ax[1].plot(x,c1, x,c2)
ax[1].set_ylabel('probability')
ax[1].set_title('cdf(x)')
plt.show()