Skip to content

Commit

Permalink
initial commit
Browse files Browse the repository at this point in the history
  • Loading branch information
thisarawelmilla committed Jul 3, 2020
1 parent d609377 commit 4acbfd4
Show file tree
Hide file tree
Showing 24 changed files with 978 additions and 0 deletions.
Binary file added img/facets_example.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added img/interface.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added img/spell_correction_example.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added img/system_architecture.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added img/text_classification_example.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
109 changes: 109 additions & 0 deletions indexing/indexing.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,109 @@
from elasticsearch import Elasticsearch
import logging
import json
import codecs

def connect_elasticsearch():
es = Elasticsearch([{'host': 'localhost', 'port': 9200}])
if es.ping():
print('Connected')
else:
print('failed to connect')
return es

def create_index(es_object, index_name):
created = False
# index settings
settings = {
"settings": {
"number_of_shards": 1,
"number_of_replicas": 0
},

"mappings": {
"properties":{
"songs": {
"properties": {
"title": {
"type": "keyword"
},
"singer": {
"type": "keyword",
"store": True
},
"composer": {
"type": "keyword",
"store": True
},
"music": {
"type": "keyword",
"store": True
},
"lyrics": {
"type": "text"
},
"beat": {
"type": "keyword",
"store": True
},
"key": {
"type": "keyword",
"store": True
},
"gerne": {
"type": "text",
"store": True
},
"ratings": {
"type": "float",
"store": True
}
}
}}
}
}
try:
if not es_object.indices.exists(index_name):
# Ignore 400 means to ignore "Index Already Exist" error.
es_object.indices.create(index=index_name, body=settings)
created = True
except Exception as ex:
print(str(ex))
finally:
return created


def store_record(elastic_object, index_name, record):
try:
outcome = elastic_object.index(index=index_name, body=record)
except Exception as ex:
print('Error in indexing data')
print(str(ex))


def search(es_object, index_name, search):
res = es_object.search(index=index_name, body=search)
return res


# read songs data
with codecs.open('/home/thisara/Documents/sem 7/Data Mining/ir_project_160684E/scraped_songs.json', 'r', encoding='utf-8') as json_file:
text = json_file.read()
data = json.loads(text)

# create index
es = connect_elasticsearch()
create_index(es, 'songs')

# add songs
for song in data:
store_record(es, 'songs', song)









52 changes: 52 additions & 0 deletions interface.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
from flask import Flask, render_template, request
from elasticsearch import Elasticsearch
import search
import ast

with open('/home/thisara/Documents/sem 7/Data Mining/ir_project_160684E/src_copus/stopwords.txt', 'r', encoding='utf-8') as f:
text = f.read()
stopwords = text.split('\n')[1:-2]

with open('/home/thisara/Documents/sem 7/Data Mining/ir_project_160684E/copus_words.txt', 'r', encoding='utf-8') as f:
text = f.read()
coprus = ast.literal_eval(text)

with open('/home/thisara/Documents/sem 7/Data Mining/ir_project_160684E/similarity.txt', 'r', encoding='utf-8') as f:
sim = f.read()
sim_letter = ast.literal_eval(sim)
coprus = [coprus, sim_letter, stopwords]


app = Flask(__name__)
es = Elasticsearch(localhost = 'localhost', port=9200)


@app.route('/')
def home():
return render_template('search.html')


@app.route('/search/results', methods=['GET', 'POST'])
def search_request():
# read data and the query
search_term = request.form["input"]
applied_filter = {}

applied_filter['genre'] = request.form.getlist('genre')
applied_filter['ratings'] = request.form.getlist('popularity')

# retrieve data
results = search.search(es, search_term, applied_filter, coprus)

# results prepare for representation
res_represent = []
for i in results['results']:
i = i[1]
res_represent.append([i['title'],i['singer'],i['composer'],i['music'],i['lyrics'],i['beat'],i['genre'],i['ratings']])
represent = [results['spell_correction'], results['hits_number'], res_represent]

return render_template('results.html', res=represent )


if __name__ == '__main__':
app.run(host='0.0.0.0', port=5000)
162 changes: 162 additions & 0 deletions interface/results.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,162 @@
<!DOCTYPE html>
<html lang="en">

<style>
* {
box-sizing: border-box;
}

/* Create two unequal columns that floats next to each other */
.column {
float: left;
padding: 10px;
height: 1500px;
/* Should be removed. Only for demonstration */
}

.left {
width: 20%;
}

.right {
width: 80%;
}

/* Clear floats after the columns */
.row:after {
content: "";
display: table;
clear: both;
}
</style>
<head>

<meta charset="utf-8">
<link rel="stylesheet" href="https://fonts.googleapis.com/css?family=Pacifico">
<link rel="icon" href="http://obj-cache.cloud.ruanbekker.com/favicon.ico">
<link href="//netdna.bootstrapcdn.com/bootstrap/3.0.0/css/bootstrap.min.css" rel="stylesheet">

<title>Bookmarks Search</title>

</head>


<body>

<div class="column left" style="background-color:#154360">
<style type="text/css">
p {
color:#FDFEFE;
}
form {
color:#FDFEFE;
}
</style>
<p>
<center>
<font size="6" style="color:#FDFEFE">වැඬිදුර සොයන්න ...</font>
</center>
</p>
<form action="/search/results" method="post">


<p><font size="4">කාණ්ඬය</font></p>
<p>

<input type="checkbox" name="genre" value="වර්තමාන"> වර්තමාන ගීත<br>
<input type="checkbox" name="genre" value="සම්භාව්‍ය"> සම්භාව්‍ය ගීත<br>
<input type="checkbox" name="genre" value="පැරණි"> පැරණි ගීත<br>
<input type="checkbox" name="genre" value="පොප්"> පොප් ගීත<br>
<input type="checkbox" name="genre" value="ස්වර්ණ"> ස්වර්ණ ගීත<br>
<input type="checkbox" name="genre" value="යුගල"> යුගල ගීත<br>
<input type="checkbox" name="genre" value="දිරිගන්වන"> දිරිගන්වන ගීත<br>
<input type="checkbox" name="genre" value="චිත්‍රපට"> චිත්‍රපට ගීත<br>
<input type="checkbox" name="genre" value="කැලිප්සෝ"> කැලිප්සෝ ගීත<br>
<input type="checkbox" name="genre" value="ළමා"> ළමා ගීත</input>

</p>

<p>

<p>

<font size="4">ජනප්‍රියත්වය</font></p>
<p>
<input type="checkbox" name="popularity" value="ඉතා අඩු"> ඉතා අඩු<br>
<input type="checkbox" name="popularity" value="අඩු"> අඩු<br>
<input type="checkbox" name="popularity" value="සාමාන්‍ය"> සාමාන්‍ය<br>
<input type="checkbox" name="popularity" value="ඉහළ"> ඉහළ<br>
<input type="checkbox" name="popularity" value="ඉතා ඉහළ"> ඉතා ඉහළ<br>

</p>

</div>
<div class="column right" >
<div class="container">
<div style="background:transparent !important" class="jumbotron">
<div style="font-family: 'Pacifico', cursive;">
<p>
<center>
<font size="8">ඔබේ ප්‍රියතම ගීත සොයන්න ....</font>
</center>
</p>
</div>
</div>

<div class="input-group">
<input type="text" class="form-control input-lg" name="input" placeholder="සොයන්න" autofocus>
<div class="input-group-btn">
<button class="btn btn-primary btn-lg" type="submit">
<i class="glyphicon glyphicon-search"></i>
</button>
</div>
</div>
</form>

<center>
<font size="3">ගැලපීම්: {{ res[1] }} </font>
</center>

<center>
<font size="3"> {{ res[0] }} </font>
</center>

<table class="table">
<thead>
<tr>
<th>ගීතය</th>
<th>ගායකයා</th>
<th>පද රචකයා</th>
<th>තනු නිර්මාණය</th>
<th>පද රවනය</th>
<th>තාලය</th>
<th>කාණ්ඬය</th>
<th>ජනප්‍රියත්වය</th>
</tr>
</thead>

{% for hit in res[2] %}
<tbody>
<tr>
<th scope="row"><a href="{{ hit[0] }}">{{ hit[0] }}</a></th>
<td>{{ hit[1] }}</td>
<td>{{ hit[2] }}</td>
<td>{{ hit[3] }}</td>
<td>{{ hit[4] }}</td>
<td>{{ hit[5] }}</td>
<td>{{ hit[6] }}</td>
<td>{{ hit[7] }}</td>
</tr>
</tbody>
{% endfor %}
</table>


</div>
</div>
</div>


</body>
</html>

Loading

0 comments on commit 4acbfd4

Please sign in to comment.