Permalink
Switch branches/tags
Nothing to show
Find file
Fetching contributors…
Cannot retrieve contributors at this time
425 lines (422 sloc) 14.8 KB
data.entry()
?data.entry
?edit
grep("Country",names(X))
names(X[grep("Country",names(X))])
table(X$Country.of.birth)
quartz()
table(X$Country.of.birth)
barplot(table(X$Country.of.birth))
length(table(X$Country.of.birth))
length(table(X$Country.of.birth))
summary(X$Country.of.birth)
barplot(summary(X$Country.of.birth))
barplot(summary(X$Country.of.birth)/sum(summary(X$Country.of.birth)*100)
)
summary(X$Country.of.birth)/sum(summary(X$Country.of.birth)*100
)
sum(summary(X$Country.of.birth)
)
summary(X$Country.of.birth)/sum(summary(X$Country.of.birth)
)
summary(X$Country.of.birth)/sum(summary(X$Country.of.birth))
summary(X$Country.of.birth)/sum(summary(X$Country.of.birth))*100
summary(X$Country.of.birth[1])
summary(X$Country.of.birth)
summary(X$Country.of.birth)[1]
summary(X$Country.of.birth)[1]/sum(X$Country.of.birth)*100
summary(X$Country.of.birth)[1]/sum(summary(X$Country.of.birth))*100
round(summary(X$Country.of.birth)[1]/sum(summary(X$Country.of.birth))*100,2)
round(summary(X$Country.of.birth)[1]/sum(summary(X$Country.of.birth))*100,1)
round(summary(X$Country.of.birth)/sum(summary(X$Country.of.birth))*100,1)
round(summary(X$Country.of.birth)/sum(summary(X$Country.of.birth))*100,2)
plot(round(summary(X$Country.of.birth)/sum(summary(X$Country.of.birth))*100,2))
barplot(round(summary(X$Country.of.birth)/sum(summary(X$Country.of.birth))*100,2))
barplot(round(summary(X$Country.of.birth)/sum(summary(X$Country.of.birth))*100,2),ylim=max(summary(X$Country.of.birth)+5)
)
barplot(round(summary(X$Country.of.birth)/sum(summary(X$Country.of.birth))*100,2),ylim=c(0:max(summary(X$Country.of.birth)+5)))
barplot(round(summary(X$Country.of.birth)/sum(summary(X$Country.of.birth))*100,2))
)
barplot(round(summary(X$Country.of.birth)/sum(summary(X$Country.of.birth))*100,2))
?barplot
barplot(round(summary(X$Country.of.birth)/sum(summary(X$Country.of.birth))*100,2)ylim=c(0,max(summary(X$Country.of.birth)+5))
barplot(round(summary(X$Country.of.birth)/sum(summary(X$Country.of.birth))*100,2), ylim=c(0,max(summary(X$Country.of.birth)+5))
)
barplot(round(summary(X$Country.of.birth)/sum(summary(X$Country.of.birth))*100,2), ylim=c(0,max(summary(X$Country.of.birth)+5))
)
q()
ls()
Languages
summary(languages)
summary(Languages)
languages
Languages
?write.csv
write.csv(Languages,file="lang.csv")
x(grep("Languages", names(X))
)
X[grep("Languages", names(X))]
X[grep("Language", names(X))]
namex(X)
names(X)
X[grep("Programming", names(X))]
Languages <- X[grep("Programming", names(X))]
write.csv(Languages,"lang.csv")
help.search("cluster")
?dist
dist(Languages)
?dist
dist(Languages, method="binary")
Languages_distance <- dist(Languages, method="binary")
summary(Languages_distance)
Languages_distance[1,]
Languages_distance
names(Languages_distance)
?cluster
?clust
?hclust
Languages_hclust <- hclust(Languages_distance,method="complete", Languages)
length(Languages_dist)
ls()
length(Languages_distance)
Languages_hclust <- hclust(Languages_distance,method="complete")
quartz()
plot(Languages_hclust)
dev.off()
?cutree
quartz()
plot(Languages_hclust)
dev.off()
pdf("cluster.pdf")
plot(Languages_hclust)
dev.off()
Languages_hclust
print(Languages_hclust)
dendrogram(Languages_hclust)
dendrogram(Languages_hclust)
?dendrogram
Languages_dend <- as.dendrogram(Languages_hclust)
Languages_dend
summary(Languages_dend)
Languages_dend[1,]
Languages_dend[,1]
Languages_dend[1,1]
Languages_dend
Languages_hclust <- hclust(Languages_distance,method="average")
quartz()
plot(Languages_hclust)
length(Languages_distance)
X[grep("Languages",X)]
X[grep("Programming",X)]
names(X)
grep("Programming",names(X))
X[grep("Programming",names(X))]
cclust
help.search('cclust')
help.search('cclust')
libary(cluster)
library(cluster)
?clclust
??clclust
?clclust
??clclust
?hclust
?kmeans
length(Languages_distance)
Languages
summary(Languages)
as.matrix(Languages)
Languages.dist <- dist(as.matrix(Languages),method="binary")
Languages.dist
length(Languages.dist)
libary(prabclust)
library(prabclust)
library(prabclus)
set.seed(2341)
Lang.prab <- prabinit(prabmatrix=Languages,neighbourhood=nb)
Lang.prab <- prabinit(prabmatrix=Languages)
kulczynski(Lang)
kulczynski(Languages)
Lang.dist <- kulczynski(Languages)
Lang.dist
Languages_hclust <- hclust(Lang.dist,method="average")
Lang.dist
Languages_hclust <- hclust(Lang.dist,method="average")
length(Lang.dist)
Lang.dist[1]
Lang.dist[2]
Lang.dist[3]
Lang.dist[1,1]
Lang.dist[1,2]
Lang.dist[2,1]
length(Lang.dist[1])
length(Lang.dist[1,])
length(Lang.dist[,1])
hclust(Lang.dist)
ls()
mydata <- Languages
wss <- (nrow(mydata)-1)*sum(apply(mydata,2,var))
for (i in 2:15) wss[i] <- sum(kmeans(mydata,
centers=i)$withinss)
plot(1:15, wss, type="b", xlab="Number of Clusters",
ylab="Within groups sum of squares")
?kmeans
wss
length(wss)
for (i in 2:15 wss[i] <- sum(kmeans(mydata,centers=i,iter.max=20)$withinss)
for (i in 2:15) wss[i] <- sum(kmeans(mydata,centers=i,iter.max=20)$withinss)
plot(1:15, wss, type="b", xlab="Number of Clusters", ylab="within SS")
nrow(mydata)
wss
wss <- (nrow(mydata)-1)*sum(apply(mydata,2,var))
d <- dist(mydata,method="binary")
plot(fit)
fit <- hclust(d, method="ward")
plot
plot(fit)
fit
groups <- cutree(fit, k=6)
groups
X$Prog.clusters <- groups
X[grep("Programming", names(X)]
X[grep("Programming", names(X))]
summary(X[grep("Programming", names(X)) && X$Programming.groups ==1 ])
summary(X[grep("Programming", names(X)) && X$Prog.clusters ==1 ])
summary(X[grep("Programming", names(X)) & X$Prog.clusters ==1 ])
summary(as.factor(X[grep("Programming", names(X)) && X$Prog.clusters ==1 ]))
X[grep(
X[grep("Programing"),X]
X[grep("Programing"),X)]
X[grep("Programing",X)]
X[grep("Programing",nams(X))]
X[grep("Programing",names(X))]
names(X)
X[grep("^Programing",names(X))]
grep("^Programming",names(X))
X[grep("^Programming",names(X))]
X[grep("^Programming",names(X)) && X$Prog.clusters ==1]
names(X[grep("^Programming",names(X)) && X$Prog.clusters ==1])
help("&")
X[grep("^Programming",names(X)) & X$Prog.clusters ==1])]
X[grep("^Programming",names(X)) & X$Prog.clusters ==1]
X[grep("^Programming",names(X)) && X$Prog.clusters ==1]
X[grep("^Programming",names(X)),X$Prog.clusters ==1]
X[[grep("^Programming",names(X)),X$Prog.clusters ==1]]
nrow(X$Prog.clusters==1)
?nrow
X[[grep("^Programming",names(X)) && X$Prog.clusters ==1]]
names(X[[grep("^Programming",names(X)) && X$Prog.clusters ==1]])
X[grep("^Programming",names(X)) && X$Prog.clusters ==1]
names(X[grep("^Programming",names(X)) && X$Prog.clusters ==1])
X[grep("^Programming",names(X))]
names(X[grep("^Programming",names(X))])
X[grep("^Programming",names(X))]
length(names(X[grep("^Programming",names(X))]))
summary(X[grep("^Programming",names(X))])
summary(as.factor(X[grep("^Programming",names(X))]))
summary(factor(X[grep("^Programming",names(X))]))
?factor
factor(X[grep("^Programming",names(X))])
X[grep("^Programming",names(X))][1]
factor(X[grep("^Programming",names(X))][1])
table(X[grep("^Programming",names(X))][1])
names(X[grep("^Programming",names(X))][1])
names(X[grep("^Programming",names(X))][2])
table(X[grep("^Programming",names(X))][2])
table(X[grep("^Programming",names(X))][3])
table(X[grep("^Programming",names(X))][4])
table(X[grep("^Programming",names(X))][5])
names(X[grep("^Programming",names(X))][5])
names(X[grep("^Programming",names(X))][6])
names(X[grep("^Programming",names(X))][6])
table(X[grep("^Programming",names(X))][5])
table(X[grep("^Programming",names(X))][6])
table(X[grep("^Programming",names(X))][7])
table(X[grep("^Programming",names(X))])
table(X[grep("^Programming",names(X))])
tapply(sum,X[grep("^Programming",names(X))])
apply(sum,X[grep("^Programming",names(X))])
apply(X[grep("^Programming",names(X))],sum)
sapply(X[grep("^Programming",names(X))],sum)
sapply(X[grep("^Programming",names(X)) %% Prog.clusters == 1],sum)
sapply(X[grep("^Programming",names(X)) %% Prog.clusters == 1],sum)
sapply(X[grep("^Programming",names(X)) && X$Prog.clusters == 1],sum)
?which
sapply(X[grep("^Programming",names(X)) && which(Prog.clusters == 1)],sum)
X[grep("^Programming",names(X)) && which(Prog.clusters == 1)]
X[grep("^Programming",names(X)) && which(X$Prog.clusters == 1)]
names(X[grep("^Programming",names(X)) && which(X$Prog.clusters == 1)])
names(X[grep("^Programming",names(X)) & which(X$Prog.clusters == 1)])
names(X[grep("^Programming",names(X)), which(X$Prog.clusters == 1)])
names(X[grep("^Programming",names(X)), which(X$Prog.clusters == 1,array.ind=T)])
names(X[grep("^Programming",names(X)), which(X$Prog.clusters == 1,arr.ind=T)])
which(X$Prog.clusters == 1)
length(which(X$Prog.clusters == 1))
length(which(X$Prog.clusters == 2))
length(which(X$Prog.clusters == 3))
length(which(X$Prog.clusters == 4))
length(which(X$Prog.clusters == 5))
length(which(X$Prog.clusters == 6))
X[(which(X$Prog.clusters == 6))]
X[,(which(X$Prog.clusters == 6))]
X[(which(X$Prog.clusters == 6)),]
names(X[which(X$Prog.clusters == 6),grep("^Programming",names(X))]
)
sapply(X[which(X$Prog.clusters == 6),grep("^Programming",names(X))],sum)
sapply(X[which(X$Prog.clusters == 5),grep("^Programming",names(X))],sum)
sapply(X[which(X$Prog.clusters == 4),grep("^Programming",names(X))],sum)
summary(sapply(X[which(X$Prog.clusters == 4),grep("^Programming",names(X))],sum))
sapply(X[which(X$Prog.clusters == 4),grep("^Programming",names(X))],sum)[sapply(X[which(X$Prog.clusters == 4),grep("^Programming",names(X))],sum) > 0]
sapply(X[which(X$Prog.clusters == 1),grep("^Programming",names(X))],sum)[sapply(X[which(X$Prog.clusters == 1),grep("^Programming",names(X))],sum) > 0]
sapply(X[which(X$Prog.clusters == 1),grep("^Programming",names(X))],sum)[sapply(X[which(X$Prog.clusters == 1),grep("^Programming",names(X))],sum) > 0]
sapply(X[which(X$Prog.clusters == 2),grep("^Programming",names(X))],sum)[sapply(X[which(X$Prog.clusters == 2),grep("^Programming",names(X))],sum) > 0]
sapply(X[which(X$Prog.clusters == 1),grep("^Programming",names(X))],sum)[sapply(X[which(X$Prog.clusters == 1),grep("^Programming",names(X))],sum) > 0]
ls()
ls()
grep("!X",ls(),perl=T)
grep("X",ls(),perl=T)
grep("!X",ls(),perl=T)
grep("^[!X]$,ls(),perl=T)
grep("^[!X]$",ls(),perl=T)
grep("^[^X]$",ls(),perl=T)
ls()[grep("^[^X]$",ls(),perl=T)]
ls()[grep("^(?!X$)",ls(),perl=T)]
rm(list=ls()[grep("^(?!X$)",ls(),perl=T)])
ls()
names(X)
ls()
summary(lang)
table(lang)
sapply(lang,sum)
table(sapply(lang,sum))
table(sapply(lang,sum))
sapply(lang,sum)
sort(sapply(lang,sum))
lang_count <- sort(sapply(lang,sum),decreasing=T)
write.csv('lang_count.csv',lang_count)
write.csv(lang_count,file="lang_count.csv")
lang_count <- sort(sapply(lang,sum),decreasing=T)
names(x)
names(X)
summary(X$Years.programming.Perl)
sd(X$Years.programming.Perl)
quartz()
dev.off
dev.off()
dev.off()
dev.off()
quartz()
hist(X$Years.programming.Perl)
plot(X$Years.programming.Perl)
barplot(X$Years.programming.Perl)
hist(X$Years.programming.Perl)
boxplot(X$Years.programming.Perl)
grep("salary",names(X))
p("Earn",names(X))
names(X)
names(X)
plot(X$Years.programming.Perl,X$Income)
lm(X$Years.programming.Perl,X$Income)
lm(X$Years.programming.Perl ~ X$Income)
plot(lm(X$Years.programming.Perl ~ X$Income))
summary(lm(X$Years.programming.Perl ~ X$Income))
barplot(X$Income)
plot(X$Income)
plot(X$Income[X$Income>0])
plot(X$Income[as.integer(X$Income)>0])
summary(X$Income)
plot(X$Income[X$Income != 0])
as.integer(X$Income)
summary(lm(X$Years.programming.Perl ~ as.integer(X$Income)))
grep("CPAN",names(X))
names(X[grep("CPAN",names(X))])
plot(X[grep("CPAN",names(X))])
names(X[grep("CPAN",names(X))])
names(X[grep("CPAN",names(X))])[1]
X[[names(X[grep("CPAN",names(X))])[1]]]
plot(X[[names(X[grep("CPAN",names(X))])[1]]])
hist(X[[names(X[grep("CPAN",names(X))])[1]]])
plot(X[[names(X[grep("CPAN",names(X))])[1]]])
hist(X[[names(X[grep("CPAN",names(X))])[1]]])
barplot(X[[names(X[grep("CPAN",names(X))])[1]]])
summary(X[[names(X[grep("CPAN",names(X))])[1]]])
plot(summary(X[[names(X[grep("CPAN",names(X))])[1]]]))
summary(X[[names(X[grep("CPAN",names(X))])[1]]])
table(X[[names(X[grep("CPAN",names(X))])[1]]])
X[[names(X[grep("CPAN",names(X))])[1]]]
names(X[grep("CPAN",names(X))])
plot(X$Contributed.to.cpan)
plot(X$Contributed.to.CPAN)
summary(X$Contributed.to.CPAN)
table(X$Contributed.to.CPAN)
sum(table(X$Contributed.to.CPAN))
table(X$Gender)
sum(table(X$Contributed.to.CPAN[X$Gender=="male"))
sum(table(X$Contributed.to.CPAN[X$Gender=="male"]))
sum(table(X$Contributed.to.CPAN[X$Gender=="female"]))
table(X$Contributed.to.CPAN[X$Gender=="female"])
table(X$Contributed.to.CPAN[X$Gender=="male"])
function { x <- table(X$Contributed.to.CPAN[X$Gender=="female"])
function() { x <- table(X$Contributed.to.CPAN[X$Gender=="female"])
}
x
}
res <- function() { ct <- table(X$Contributed.to.CPAN[X$Gender=="female"]);
}
ls()
rm(list=c("res"))
ls()
ct_male_contrib <- table(X$Contributed.to.CPAN[X$Gender=="male"])
ct_male_contrib/sum(ct_male_contrib) * 100
ct_female_contrib <- table(X$Contributed.to.CPAN[X$Gender=="female"])
ct_female_contrib/sum(ct_female_contrib) * 100
q()
X$AGE
X$Age
names(X)
dev.off()
quartz()
hist(Year.of.birth)
hist(X$Year.of.birth)
source ("03-age.R")
source ("02-age.R")
Age <- survey.year-X$Year.of.birth
Age[Age < 14] <- NA
Age[Age > 79] <- NA
sd(Age,na.rm=T)
survey.year<- 2007 # or as.numeric(format(Sys.time(),"%Y")) for the next one
Age <- survey.year-X$Year.of.birth
Age[Age < 14] <- NA
Age[Age > 79] <- NA
sd(Age,na.rm=T)
hist(Age)
X$Age <- Age
names(X[grep("CPAN",names(X))])
hist(X$Age[X$Contributed.to.CPAN == 1])
hist(X$Age)
hist(X$Age[X$Contributed.to.CPAN == 1])
hist(X$Age[X$Contributed.to.CPAN == 1])
mean(X$Age[X$Contributed.to.CPAN == 1])
mean(X$Age[X$Contributed.to.CPAN == 1],na.rm=T)
mean(X$Age[X$Contributed.to.CPAN == 0],na.rm=T)
names(X[grep("CPAN",names(X))])
mean(X$Age[X$CPAN.modules.maintained > 0],na.rm=T)
mean(X$Age[X$CPAN.modules.maintained > 1],na.rm=T)
mean(X$Age[X$CPAN.modules.maintained > 2],na.rm=T)
mean(X$Age[X$CPAN.modules.maintained > 3],na.rm=T)
mean(X$Age[X$CPAN.modules.maintained > 4],na.rm=T)
mean(X$Age[X$CPAN.modules.maintained > 5],na.rm=T)
lm(X$Age[CPAN.modules.maintained > 0] ~ X$CPAN.modules.maintained[CPAN.modules.maintanied > 0])
lm(X$Age[X$CPAN.modules.maintained > 0] ~ X$CPAN.modules.maintained[X$CPAN.modules.maintanied > 0])
plot(X$Age[X$CPAN.modules.maintained > 0] ~ X$CPAN.modules.maintained[X$CPAN.modules.maintanied > 0])
plot(X$Age[X$CPAN.modules.maintained > 0], X$CPAN.modules.maintained[X$CPAN.modules.maintanied > 0])
X$CPAN.modules.maintained[X$CPAN.modules.maintained]
X$CPAN.modules.maintained[X$CPAN.modules.maintained > 0]
length(X$CPAN.modules.maintained[X$CPAN.modules.maintained > 0])
length(X$Age[X$CPAN.modules.maintained > 0])
lm(X$Age[X$CPAN.modules.maintained > 0] ~ X$CPAN.modules.maintained[X$CPAN.modules.maintained > 0])
summary(lm(X$Age[X$CPAN.modules.maintained > 0] ~ X$CPAN.modules.maintained[X$CPAN.modules.maintained > 0]))
summary(lm(X$Age[X$CPAN.modules.maintained > 1] ~ X$CPAN.modules.maintained[X$CPAN.modules.maintained > 1]))
summary(lm(X$Age[X$CPAN.modules.maintained > 2] ~ X$CPAN.modules.maintained[X$CPAN.modules.maintained > 2]))
summary(lm(X$Age[X$CPAN.modules.maintained > 3] ~ X$CPAN.modules.maintained[X$CPAN.modules.maintained > 3]))
q()