Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with
or
.
Download ZIP
Browse files

making a proper sinatra app

  • Loading branch information...
commit d13cbdf1935ca49693ec934d5f097c57ce11deff 1 parent f0cd62d
@maxdemarzi authored
View
1  .gitignore
@@ -0,0 +1 @@
+neo4j
View
8 README.rdoc
@@ -4,3 +4,11 @@ An implementation of a graph based movie recommender engine by Marko Rodriguez.
http://markorodriguez.com/2011/09/22/a-graph-based-movie-recommender-engine
+
+Demo http://neoflix.heroku.com
+
+Deployment:
+
+ heroku create neoflix --stack cedar
+ heroku addons:add neo4j
+ git push heroku master
View
1  Rakefile
@@ -0,0 +1 @@
+require 'neography/tasks'
View
2  config.ru
@@ -0,0 +1,2 @@
+require './neoflix.rb'
+run Sinatra::Application
View
106 import_data.rb
@@ -0,0 +1,106 @@
+require 'rubygems'
+require 'neography'
+
+@neo = Neography::Rest.new(ENV['NEO4J_URL'] || "http://localhost:7474")
+
+# Load Genres
+@neo.create_node_index("genres_index", "fulltext","lucene")
+
+genres = %w[Action Adventure Animation Children's Comedy Crime Documentary Drama Fantasy Film-Noir Horror Musical Mystery Romance Sci-Fi Thriller War Western]
+
+batch_command = []
+
+genres.each_with_index do |g,i|
+ batch_command << [:create_node, {"name" => g}]
+ batch_command << [:add_node_to_index, "genres_index", "name", g, "{#{i * 2}}"]
+end
+
+genre_nodes = @neo.batch *batch_command
+
+genre_ids={}
+genre_nodes.each do |n|
+ genre_ids[n["body"]["data"]["name"]] = n["location"].split('/').last
+end
+
+# Load Occupations
+@neo.create_node_index("occupations_index", "fulltext","lucene")
+
+occupations = ["other" ,"academic/educator", "artist", "clerical/admin", "college/grad student",
+ "customer service", "doctor/health care", "executive/managerial", "farmer", "homemaker",
+ "K-12 student", "lawyer", "programmer", "retired", "sales/marketing", "scientist",
+ "self-employed", "technician/engineer", "tradesman/craftsman", "unemployed", "writer"]
+
+batch_command = []
+
+occupations.each_with_index do |g,i|
+ batch_command << [:create_node, {"name" => g}]
+ batch_command << [:add_node_to_index, "occupations_index", "name", g, "{#{i * 2}}"]
+end
+
+occupation_nodes = @neo.batch *batch_command
+
+occupation_ids=[]
+occupations.each_with_index do |n, i|
+ occupation_ids[i] = occupation_nodes[i * 2]["location"].split('/').last
+end
+
+# Load Movies
+@neo.create_node_index("movies_index", "fulltext","lucene")
+
+batch_command = []
+
+movies_file = File.new(File.dirname(__FILE__) +"/data/movies.dat", 'r')
+movies = IO.readlines(movies_file)
+
+movie_nodes = []
+counter = 0
+movies.each do |line|
+ movie = line.force_encoding("ISO-8859-1").split("::")
+ batch_command << [:create_node, {"type" => "Movie", "movieId" => movie[0].to_i, "title" => movie[1]}]
+ batch_command << [:add_node_to_index, "movies_index", "title", movie[1], "{#{counter}}"]
+
+ movie[2].split("|").each do |g|
+ batch_command << [:create_relationship, "hasGenre", "{#{counter}}", genre_ids[g.chomp] ]
+ end
+ counter = counter + 2 + movie[2].split("|").size
+
+ if counter > 100
+ movie_nodes << @neo.batch(*batch_command)
+ counter = 0
+ batch_command = []
+ end
+end
+
+movie_nodes << @neo.batch(*batch_command)
+#puts movie_nodes.last.inspect
+
+
+# Load Users
+@neo.create_node_index("users_index")
+
+batch_command = []
+
+users_file = File.new(File.dirname(__FILE__) +"/data/users.dat", 'r')
+users = IO.readlines(users_file)
+
+user_nodes = []
+counter = 0
+[users.first].each do |line|
+ user = line.force_encoding("ISO-8859-1").split("::")
+ batch_command << [:create_node, {"type" => "User", "userId" => user[0].to_i, "gender" => user[1], "age" => user[2].to_i}]
+ batch_command << [:add_node_to_index, "users_index", "userId", user[0].to_i, "{#{counter}}"]
+ batch_command << [:create_relationship, "hasOccupation", "{#{counter}}", occupation_ids[user[3].to_i] ]
+ puts batch_command.inspect
+ counter = counter + 3
+
+ if counter > 100
+ user_nodes << @neo.batch(*batch_command)
+ counter = 0
+ batch_command = []
+ puts "inserted 100 items to #{user_nodes.size} at #{Time.now} #{user_nodes.last.inspect}"
+ end
+end
+
+user_nodes << @neo.batch(*batch_command)
+
+puts user_nodes.inspect
View
80 neoflix.rb
@@ -1,27 +1,32 @@
require 'rubygems'
require 'neography'
+require 'sinatra'
-@neo = Neography::Rest.new(ENV['NEO4J_URL'] || "http://localhost:7474")
+Neography::Rest.default_options[:timeout] = 9000
-# Setup automatic Indexing on all vertices and all properties.
-# null => All keys or it can be a Set<String> of keys to automatically index
+neo = Neography::Rest.new(ENV['NEO4J_URL'] || "http://localhost:7474")
-@neo.execute_script("g.createAutomaticIndex('vertices', Vertex.class, null);")
+def create_graph(neo)
+ # do not recreate the graph if it already exists
+ return if neo.execute_script("g.idx('vertices')[[type:'Movie']].count();").to_i > 0
-# If the graph already existed prior to creating the AutomaticIndex,
-# then we reIndex all the vertices.
+ # Setup automatic Indexing on all vertices and all properties.
+ # null => All keys or it can be a Set<String> of keys to automatically index
-# @neo.execute_script("AutomaticIndexHelper.reIndexElements(g, g.idx('vertices'), g.V);")
+ if neo.execute_script("g.indices;").empty?
+ neo.execute_script("g.createAutomaticIndex('vertices', Vertex.class, null);")
-@neo.execute_script("g.setMaxBufferSize(1000);
- occupations = [0:'other', 1:'academic/educator', 2:'artist',
- 3:'clerical/admin', 4:'college/grad student', 5:'customer service',
- 6:'doctor/health care', 7:'executive/managerial', 8:'farmer',
- 9:'homemaker', 10:'K-12 student', 11:'lawyer', 12:'programmer',
- 13:'retired', 14:'sales/marketing', 15:'scientist', 16:'self-employed',
- 17:'technician/engineer', 18:'tradesman/craftsman', 19:'unemployed', 20:'writer'];")
+ # If vertices already existed prior to creating the AutomaticIndex,
+ # then we reIndex all the vertices.
+
+ neo.execute_script("AutomaticIndexHelper.reIndexElements(g, g.idx('vertices'), g.V);") if neo.execute_script("g.V.count();").to_i > 0
+ end
-@neo.execute_script("'https://raw.github.com/maxdemarzi/neoflix/master/data/movies.dat'.toURL().eachLine { def line ->
+ begin
+
+ neo.execute_script("g.setMaxBufferSize(1000);
+
+ 'https://raw.github.com/maxdemarzi/neoflix/master/data/movies.dat'.toURL().eachLine { def line ->
def components = line.split('::');
def movieVertex = g.addVertex(['type':'Movie', 'movieId':components[0].toInteger(), 'title':components[1]]);
components[2].split(/\|/).each { def genera ->
@@ -29,10 +34,51 @@ def hits = g.idx(Tokens.T.v)[[genera:genera]].iterator();
def generaVertex = hits.hasNext() ? hits.next() : g.addVertex(['type':'Genera', 'genera':genera]);
g.addEdge(movieVertex, generaVertex, 'hasGenera');
}
- }")
+ };
-@neo.execute_script("'https://raw.github.com/maxdemarzi/neoflix/master/data/ratings.dat'.toURL().eachLine {def line ->
+ occupations = [0:'other', 1:'academic/educator', 2:'artist',
+ 3:'clerical/admin', 4:'college/grad student', 5:'customer service',
+ 6:'doctor/health care', 7:'executive/managerial', 8:'farmer',
+ 9:'homemaker', 10:'K-12 student', 11:'lawyer', 12:'programmer',
+ 13:'retired', 14:'sales/marketing', 15:'scientist', 16:'self-employed',
+ 17:'technician/engineer', 18:'tradesman/craftsman', 19:'unemployed', 20:'writer'];
+
+ 'https://raw.github.com/maxdemarzi/neoflix/master/data/users.dat'.toURL().eachLine { def line ->
+ def components = line.split('::');
+ def userVertex = g.addVertex(['type':'User', 'userId':components[0].toInteger(), 'gender':components[1], 'age':components[2].toInteger()]);
+ def occupation = occupations[components[3].toInteger()];
+ def hits = g.idx(Tokens.T.v)[[occupation:occupation]].iterator();
+ def occupationVertex = hits.hasNext() ? hits.next() : g.addVertex(['type':'Occupation', 'occupation':occupation]);
+ g.addEdge(userVertex, occupationVertex, 'hasOccupation');
+ };
+
+ 'https://raw.github.com/maxdemarzi/neoflix/master/data/ratings.dat'.toURL().eachLine {def line ->
def components = line.split('::');
def ratedEdge = g.addEdge(g.idx(Tokens.T.v)[[userId:components[0].toInteger()]].next(), g.idx(T.v)[[movieId:components[1].toInteger()]].next(), 'rated');
ratedEdge.setProperty('stars', components[2].toInteger());
}")
+
+ puts "Loaded Data"
+
+ rescue Timeout::Error
+ puts "Creating the graph is going to take some time, watch it on #{ENV['NEO4J_URL'] || "http://localhost:7474"}"
+ end
+end
+
+create_graph(neo)
+
+get '/recreate_graph' do
+ neo.execute_script("g.clear();")
+ create_graph(neo)
+end
+
+get '/' do
+ puts "Indices: " + neo.execute_script("g.indices;").to_s + " should be [""AUTOMATIC[vertices:Vertex][autoIndexKeys:null]""]"
+ puts "Vertices: " + neo.execute_script("g.V.count();").to_s + " should be 9962"
+ puts "Edges: " + neo.execute_script("g.E.count();").to_s + " should be 1012657"
+ puts "Movies: " + neo.execute_script("g.idx('vertices')[[type:'Movie']].count();").to_s + " should be 3883"
+ puts "Genera: " + neo.execute_script("g.idx('vertices')[[type:'Genera']].count();").to_s + " should be 18"
+ puts "Users: " + neo.execute_script("g.idx('vertices')[[type:'User']].count();").to_s + " should be 6040"
+ puts "Occupations: " + neo.execute_script("g.idx('vertices')[[type:'Occupation']].count();").to_s + " should be 21"
+ puts "Genera: " + neo.execute_script("g.idx('vertices')[[type:'Genera']].map();").to_s + " should be 18"
+end
View
0  data/README → public/README
File renamed without changes
View
2  data/movies.dat → public/movies.dat
@@ -3880,4 +3880,4 @@
3949::Requiem for a Dream (2000)::Drama
3950::Tigerland (2000)::Drama
3951::Two Family House (2000)::Drama
-3952::Contender, The (2000)::Drama|Thriller
+3952::Contender, The (2000)::Drama|Thriller
View
2  data/ratings.dat → public/ratings.dat
@@ -1000206,4 +1000206,4 @@
6040::1094::5::956704887
6040::562::5::956704746
6040::1096::4::956715648
-6040::1097::4::956715569
+6040::1097::4::956715569
View
2  data/users.dat → public/users.dat
@@ -6037,4 +6037,4 @@
6037::F::45::1::76006
6038::F::56::1::14706
6039::F::45::0::01060
-6040::M::25::6::11106
+6040::M::25::6::11106
Please sign in to comment.
Something went wrong with that request. Please try again.