diff --git a/homework/homework3/goals.dat b/homework/homework3/goals.dat new file mode 100644 index 0000000..3e8c505 --- /dev/null +++ b/homework/homework3/goals.dat @@ -0,0 +1,57 @@ +0,1872 +1,1874 +0,1876 +2,1878 +4,1880 +1,1882 +0,1884 +1,1886 +5,1888 +1,1890 +4,1892 +2,1894 +1,1896 +3,1898 +1,1900 +1,1904 +1,1906 +1,1908 +0,1910 +1,1912 +1,1914 +NA,1916 +NA,1918 +0,1921 +2,1923 +0,1925 +2,1927 +0,1929 +0,1931 +1,1933 +0,1935 +1,1937 +2,1939 +NA,1941 +NA,1943 +NA,1945 +2,1948 +1,1950 +2,1952 +4,1954 +1,1956 +4,1958 +1,1960 +0,1962 +0,1964 +4,1966 +1,1968 +0,1970 +1,1972 +0,1974 +1,1976 +1,1978 +2,1980 +1,1982 +1,1984 +0,1985 +0,1987 diff --git a/homework/homework3/hw3.py b/homework/homework3/hw3.py new file mode 100644 index 0000000..9add6d8 --- /dev/null +++ b/homework/homework3/hw3.py @@ -0,0 +1,60 @@ +#1/usr/bin/env python + +import math +import matplotlib.pyplot as plt + +SQRT_2PI = math.sqrt(2.0 * math.pi) + +def gaussian(x): + return math.exp(-0.5*x*x)/SQRT_2PI + +def kernel(x,y,h,binpoints): + bins = [0] * len(binpoints) + i = 0 + for b in binpoints: + bins[i] = (float(y)/float(h))*gaussian((b-x)/float(h)) + i += 1 + return bins + +filename = 'goals.dat' + +years = [] #x-axis +scores = [] #y-axis + +for line in file(filename): + line = line.strip() + [score, year] = line.split(",") + if score != 'NA': + scores.append(float(score)) + years.append(float(year)) + +n = len(years) +nbins = n * 1 +low = min(years) +high = max(years) + +binpoints = [0] * nbins +masterbin = [0] * nbins + +delta = (high - low) / (nbins - 1) + +b = low +i = 0 +while b <= high: + binpoints[i] = b + b += delta + i += 1 + +for i in range(n): + # I like using 10 the best, the line is smoothed the most and seems to + # show trends best, 1 is not smoothed much, 3 still seems too messy + bins = kernel(years[i],scores[i], 10, binpoints) + for j in range(nbins): + masterbin[j] += bins[j] # I had "=+" here and could not get the KDE to plot, took forever to find the problem + +plt.plot(binpoints,masterbin,'b-') +plt.plot(years, scores,'g-') +plt.xlabel("Year") +plt.ylabel("Score") +plt.title("England's Score") +plt.show()