-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
d609377
commit 4acbfd4
Showing
24 changed files
with
978 additions
and
0 deletions.
There are no files selected for viewing
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,109 @@ | ||
from elasticsearch import Elasticsearch | ||
import logging | ||
import json | ||
import codecs | ||
|
||
def connect_elasticsearch(): | ||
es = Elasticsearch([{'host': 'localhost', 'port': 9200}]) | ||
if es.ping(): | ||
print('Connected') | ||
else: | ||
print('failed to connect') | ||
return es | ||
|
||
def create_index(es_object, index_name): | ||
created = False | ||
# index settings | ||
settings = { | ||
"settings": { | ||
"number_of_shards": 1, | ||
"number_of_replicas": 0 | ||
}, | ||
|
||
"mappings": { | ||
"properties":{ | ||
"songs": { | ||
"properties": { | ||
"title": { | ||
"type": "keyword" | ||
}, | ||
"singer": { | ||
"type": "keyword", | ||
"store": True | ||
}, | ||
"composer": { | ||
"type": "keyword", | ||
"store": True | ||
}, | ||
"music": { | ||
"type": "keyword", | ||
"store": True | ||
}, | ||
"lyrics": { | ||
"type": "text" | ||
}, | ||
"beat": { | ||
"type": "keyword", | ||
"store": True | ||
}, | ||
"key": { | ||
"type": "keyword", | ||
"store": True | ||
}, | ||
"gerne": { | ||
"type": "text", | ||
"store": True | ||
}, | ||
"ratings": { | ||
"type": "float", | ||
"store": True | ||
} | ||
} | ||
}} | ||
} | ||
} | ||
try: | ||
if not es_object.indices.exists(index_name): | ||
# Ignore 400 means to ignore "Index Already Exist" error. | ||
es_object.indices.create(index=index_name, body=settings) | ||
created = True | ||
except Exception as ex: | ||
print(str(ex)) | ||
finally: | ||
return created | ||
|
||
|
||
def store_record(elastic_object, index_name, record): | ||
try: | ||
outcome = elastic_object.index(index=index_name, body=record) | ||
except Exception as ex: | ||
print('Error in indexing data') | ||
print(str(ex)) | ||
|
||
|
||
def search(es_object, index_name, search): | ||
res = es_object.search(index=index_name, body=search) | ||
return res | ||
|
||
|
||
# read songs data | ||
with codecs.open('/home/thisara/Documents/sem 7/Data Mining/ir_project_160684E/scraped_songs.json', 'r', encoding='utf-8') as json_file: | ||
text = json_file.read() | ||
data = json.loads(text) | ||
|
||
# create index | ||
es = connect_elasticsearch() | ||
create_index(es, 'songs') | ||
|
||
# add songs | ||
for song in data: | ||
store_record(es, 'songs', song) | ||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,52 @@ | ||
from flask import Flask, render_template, request | ||
from elasticsearch import Elasticsearch | ||
import search | ||
import ast | ||
|
||
with open('/home/thisara/Documents/sem 7/Data Mining/ir_project_160684E/src_copus/stopwords.txt', 'r', encoding='utf-8') as f: | ||
text = f.read() | ||
stopwords = text.split('\n')[1:-2] | ||
|
||
with open('/home/thisara/Documents/sem 7/Data Mining/ir_project_160684E/copus_words.txt', 'r', encoding='utf-8') as f: | ||
text = f.read() | ||
coprus = ast.literal_eval(text) | ||
|
||
with open('/home/thisara/Documents/sem 7/Data Mining/ir_project_160684E/similarity.txt', 'r', encoding='utf-8') as f: | ||
sim = f.read() | ||
sim_letter = ast.literal_eval(sim) | ||
coprus = [coprus, sim_letter, stopwords] | ||
|
||
|
||
app = Flask(__name__) | ||
es = Elasticsearch(localhost = 'localhost', port=9200) | ||
|
||
|
||
@app.route('/') | ||
def home(): | ||
return render_template('search.html') | ||
|
||
|
||
@app.route('/search/results', methods=['GET', 'POST']) | ||
def search_request(): | ||
# read data and the query | ||
search_term = request.form["input"] | ||
applied_filter = {} | ||
|
||
applied_filter['genre'] = request.form.getlist('genre') | ||
applied_filter['ratings'] = request.form.getlist('popularity') | ||
|
||
# retrieve data | ||
results = search.search(es, search_term, applied_filter, coprus) | ||
|
||
# results prepare for representation | ||
res_represent = [] | ||
for i in results['results']: | ||
i = i[1] | ||
res_represent.append([i['title'],i['singer'],i['composer'],i['music'],i['lyrics'],i['beat'],i['genre'],i['ratings']]) | ||
represent = [results['spell_correction'], results['hits_number'], res_represent] | ||
|
||
return render_template('results.html', res=represent ) | ||
|
||
|
||
if __name__ == '__main__': | ||
app.run(host='0.0.0.0', port=5000) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,162 @@ | ||
<!DOCTYPE html> | ||
<html lang="en"> | ||
|
||
<style> | ||
* { | ||
box-sizing: border-box; | ||
} | ||
|
||
/* Create two unequal columns that floats next to each other */ | ||
.column { | ||
float: left; | ||
padding: 10px; | ||
height: 1500px; | ||
/* Should be removed. Only for demonstration */ | ||
} | ||
|
||
.left { | ||
width: 20%; | ||
} | ||
|
||
.right { | ||
width: 80%; | ||
} | ||
|
||
/* Clear floats after the columns */ | ||
.row:after { | ||
content: ""; | ||
display: table; | ||
clear: both; | ||
} | ||
</style> | ||
<head> | ||
|
||
<meta charset="utf-8"> | ||
<link rel="stylesheet" href="https://fonts.googleapis.com/css?family=Pacifico"> | ||
<link rel="icon" href="http://obj-cache.cloud.ruanbekker.com/favicon.ico"> | ||
<link href="//netdna.bootstrapcdn.com/bootstrap/3.0.0/css/bootstrap.min.css" rel="stylesheet"> | ||
|
||
<title>Bookmarks Search</title> | ||
|
||
</head> | ||
|
||
|
||
<body> | ||
|
||
<div class="column left" style="background-color:#154360"> | ||
<style type="text/css"> | ||
p { | ||
color:#FDFEFE; | ||
} | ||
form { | ||
color:#FDFEFE; | ||
} | ||
</style> | ||
<p> | ||
<center> | ||
<font size="6" style="color:#FDFEFE">වැඬිදුර සොයන්න ...</font> | ||
</center> | ||
</p> | ||
<form action="/search/results" method="post"> | ||
|
||
|
||
<p><font size="4">කාණ්ඬය</font></p> | ||
<p> | ||
|
||
<input type="checkbox" name="genre" value="වර්තමාන"> වර්තමාන ගීත<br> | ||
<input type="checkbox" name="genre" value="සම්භාව්ය"> සම්භාව්ය ගීත<br> | ||
<input type="checkbox" name="genre" value="පැරණි"> පැරණි ගීත<br> | ||
<input type="checkbox" name="genre" value="පොප්"> පොප් ගීත<br> | ||
<input type="checkbox" name="genre" value="ස්වර්ණ"> ස්වර්ණ ගීත<br> | ||
<input type="checkbox" name="genre" value="යුගල"> යුගල ගීත<br> | ||
<input type="checkbox" name="genre" value="දිරිගන්වන"> දිරිගන්වන ගීත<br> | ||
<input type="checkbox" name="genre" value="චිත්රපට"> චිත්රපට ගීත<br> | ||
<input type="checkbox" name="genre" value="කැලිප්සෝ"> කැලිප්සෝ ගීත<br> | ||
<input type="checkbox" name="genre" value="ළමා"> ළමා ගීත</input> | ||
|
||
</p> | ||
|
||
<p> | ||
|
||
<p> | ||
|
||
<font size="4">ජනප්රියත්වය</font></p> | ||
<p> | ||
<input type="checkbox" name="popularity" value="ඉතා අඩු"> ඉතා අඩු<br> | ||
<input type="checkbox" name="popularity" value="අඩු"> අඩු<br> | ||
<input type="checkbox" name="popularity" value="සාමාන්ය"> සාමාන්ය<br> | ||
<input type="checkbox" name="popularity" value="ඉහළ"> ඉහළ<br> | ||
<input type="checkbox" name="popularity" value="ඉතා ඉහළ"> ඉතා ඉහළ<br> | ||
|
||
</p> | ||
|
||
</div> | ||
<div class="column right" > | ||
<div class="container"> | ||
<div style="background:transparent !important" class="jumbotron"> | ||
<div style="font-family: 'Pacifico', cursive;"> | ||
<p> | ||
<center> | ||
<font size="8">ඔබේ ප්රියතම ගීත සොයන්න ....</font> | ||
</center> | ||
</p> | ||
</div> | ||
</div> | ||
|
||
<div class="input-group"> | ||
<input type="text" class="form-control input-lg" name="input" placeholder="සොයන්න" autofocus> | ||
<div class="input-group-btn"> | ||
<button class="btn btn-primary btn-lg" type="submit"> | ||
<i class="glyphicon glyphicon-search"></i> | ||
</button> | ||
</div> | ||
</div> | ||
</form> | ||
|
||
<center> | ||
<font size="3">ගැලපීම්: {{ res[1] }} </font> | ||
</center> | ||
|
||
<center> | ||
<font size="3"> {{ res[0] }} </font> | ||
</center> | ||
|
||
<table class="table"> | ||
<thead> | ||
<tr> | ||
<th>ගීතය</th> | ||
<th>ගායකයා</th> | ||
<th>පද රචකයා</th> | ||
<th>තනු නිර්මාණය</th> | ||
<th>පද රවනය</th> | ||
<th>තාලය</th> | ||
<th>කාණ්ඬය</th> | ||
<th>ජනප්රියත්වය</th> | ||
</tr> | ||
</thead> | ||
|
||
{% for hit in res[2] %} | ||
<tbody> | ||
<tr> | ||
<th scope="row"><a href="{{ hit[0] }}">{{ hit[0] }}</a></th> | ||
<td>{{ hit[1] }}</td> | ||
<td>{{ hit[2] }}</td> | ||
<td>{{ hit[3] }}</td> | ||
<td>{{ hit[4] }}</td> | ||
<td>{{ hit[5] }}</td> | ||
<td>{{ hit[6] }}</td> | ||
<td>{{ hit[7] }}</td> | ||
</tr> | ||
</tbody> | ||
{% endfor %} | ||
</table> | ||
|
||
|
||
</div> | ||
</div> | ||
</div> | ||
|
||
|
||
</body> | ||
</html> | ||
|
Oops, something went wrong.