Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP

Comparing changes

Choose two branches to see what's changed or to start a new pull request. If you need to, you can also compare across forks.

Open a pull request

Create a new pull request by comparing changes across two branches. If you need to, you can also compare across forks.
base fork: sp0rus/PythonAndR
base: 05b3c6ced3
...
head fork: sp0rus/PythonAndR
compare: 528847c3ee
Checking mergeability… Don't worry, you can still create the pull request.
  • 3 commits
  • 7 files changed
  • 0 commit comments
  • 1 contributor
View
2  .gitignore
@@ -1 +1 @@
-/homework
+
View
3,194 homework/homework1/Tax_Year_2007_County_Income_Data.csv
3,194 additions, 0 deletions not shown
View
45 homework/homework1/hw1.py
@@ -0,0 +1,45 @@
+#!/usr/bin/env python
+
+#assign the file to a variable
+filename = "Tax_Year_2007_County_Income_Data.csv"
+
+#assign some variables for use later
+i = 0 #counter to skip the first line
+hagi = 0 # highest average gross income
+lagi = 1000000000000000 # lowest average gross income
+allagi = 0 # average gross income of all counties
+counties = 0 #number of counties (not including those with county code 0)
+
+#loop through the file
+for line in file(filename):
+ #skip the first line
+ if i != 0:
+ #clean the data and make each line into a list to parse
+ curline = line.strip()
+ curline = curline.replace("$","")
+ curlinelist = curline.split(",")
+
+ #loop through the current list
+ if curlinelist[1] != 0:
+ counties = counties + 1
+ agi = int(curlinelist[6])
+ pop = int(curlinelist[4])
+ curavegi = agi/pop
+ #compute the highest average gross income
+ if curavegi > hagi:
+ hagi = curavegi
+ hagic = curlinelist[3]
+ #compute the lowest average gross income
+ if curavegi < lagi:
+ lagi = curavegi
+ lagic = curlinelist[3]
+ #compute the sum of the average gross incomes of all counties to find national average
+ allagi = allagi + curavegi
+ i = i + 1
+
+#report highest average gross income by county
+print "The county with the highest aveage gross income is %s" % (hagic)
+#report lowest average gross income by county
+print "The county with the lowest average gross income is %s" % (lagic)
+#average of all counties by average gross income
+print "The average gross income of all counties is $%s" % ((allagi/counties)*1000)
View
3,194 homework/homework2/Tax_Year_2007_County_Income_Data.csv
3,194 additions, 0 deletions not shown
View
64 homework/homework2/hw2.py
@@ -0,0 +1,64 @@
+#!/usr/bin/env python
+
+import matplotlib.pyplot as plt
+
+#assign the file to a variable
+filename = "Tax_Year_2007_County_Income_Data.csv"
+
+averagegrossincome = []
+returns = []
+exemptions = []
+
+#assign some variables for use later
+i = 0 #counter to skip the first line
+counties = 0 #number of counties (not including those with county code 0)
+
+#loop through the file
+for line in file(filename):
+ #skip the first line
+ if i != 0:
+ #clean the data and make each line into a list to parse
+ curline = line.strip()
+ curline = curline.replace("$","")
+ curlinelist = curline.split(",")
+
+ #append the county's average gross income to the list
+ if curlinelist[1] != 0:
+ counties = counties + 1
+ agi = int(curlinelist[6])
+ pop = int(curlinelist[4])
+ averagegrossincome.append(agi/pop)
+ returns.append(pop)
+ exemptions.append(int(curlinelist[5]))
+ i = i + 1
+
+#plot the average gross income of the counties
+plt.plot(averagegrossincome)
+plt.show()
+
+raw_input()
+
+#plot the average gross income of the counties, sorted
+sortavegi = sorted(averagegrossincome)
+plt.plot(sortavegi)
+plt.show()
+
+#compute and plot the CDF
+cdf = [0] * len(sortavegi)
+cdf[0] = sortavegi[0]
+
+j = 1
+while j < len(sortavegi):
+ cdf[j] = sortavegi[j] + cdf[j-1]
+ j += 1
+
+raw_input()
+
+plt.plot(cdf)
+plt.show()
+
+raw_input()
+
+# plot the number of returns filed and number of exemptions
+plt.plot(returns, exemptions, "r.")
+plt.show()
View
57 homework/homework3/goals.dat
@@ -0,0 +1,57 @@
+0,1872
+1,1874
+0,1876
+2,1878
+4,1880
+1,1882
+0,1884
+1,1886
+5,1888
+1,1890
+4,1892
+2,1894
+1,1896
+3,1898
+1,1900
+1,1904
+1,1906
+1,1908
+0,1910
+1,1912
+1,1914
+NA,1916
+NA,1918
+0,1921
+2,1923
+0,1925
+2,1927
+0,1929
+0,1931
+1,1933
+0,1935
+1,1937
+2,1939
+NA,1941
+NA,1943
+NA,1945
+2,1948
+1,1950
+2,1952
+4,1954
+1,1956
+4,1958
+1,1960
+0,1962
+0,1964
+4,1966
+1,1968
+0,1970
+1,1972
+0,1974
+1,1976
+1,1978
+2,1980
+1,1982
+1,1984
+0,1985
+0,1987
View
60 homework/homework3/hw3.py
@@ -0,0 +1,60 @@
+#1/usr/bin/env python
+
+import math
+import matplotlib.pyplot as plt
+
+SQRT_2PI = math.sqrt(2.0 * math.pi)
+
+def gaussian(x):
+ return math.exp(-0.5*x*x)/SQRT_2PI
+
+def kernel(x,y,h,binpoints):
+ bins = [0] * len(binpoints)
+ i = 0
+ for b in binpoints:
+ bins[i] = (float(y)/float(h))*gaussian((b-x)/float(h))
+ i += 1
+ return bins
+
+filename = 'goals.dat'
+
+years = [] #x-axis
+scores = [] #y-axis
+
+for line in file(filename):
+ line = line.strip()
+ [score, year] = line.split(",")
+ if score != 'NA':
+ scores.append(float(score))
+ years.append(float(year))
+
+n = len(years)
+nbins = n * 1
+low = min(years)
+high = max(years)
+
+binpoints = [0] * nbins
+masterbin = [0] * nbins
+
+delta = (high - low) / (nbins - 1)
+
+b = low
+i = 0
+while b <= high:
+ binpoints[i] = b
+ b += delta
+ i += 1
+
+for i in range(n):
+ # I like using 10 the best, the line is smoothed the most and seems to
+ # show trends best, 1 is not smoothed much, 3 still seems too messy
+ bins = kernel(years[i],scores[i], 10, binpoints)
+ for j in range(nbins):
+ masterbin[j] += bins[j] # I had "=+" here and could not get the KDE to plot, took forever to find the problem
+
+plt.plot(binpoints,masterbin,'b-')
+plt.plot(years, scores,'g-')
+plt.xlabel("Year")
+plt.ylabel("Score")
+plt.title("England's Score")
+plt.show()

No commit comments for this range

Something went wrong with that request. Please try again.