In [1]:
from pymilvus import MilvusClient, FieldSchema, CollectionSchema, DataType, Collection, utility, connections
from sentence_transformers import SentenceTransformer
import json
import os
import pandas as pd
from tqdm.autonotebook import tqdm, trange

  from tqdm.autonotebook import tqdm, trange


In [7]:
client = MilvusClient("milvus_demo.db")

In [4]:
model = SentenceTransformer('all-MiniLM-L6-v2')



In [3]:
# pip install "pymilvus[model]"

In [11]:
# Load resumes from the dataset.json file
with open("data.json", "r") as file:
    resumes = json.load(file)

# Prepare the data in the required format
data = []

for i, resume in enumerate(resumes):
    # Combine resume fields to create the full text
    professional_summary = resume.get("professional_summary", "")
    skills = resume.get("skills", "")
    work_history = resume.get("work_history", "")
    education = resume.get("education", "")
    text = professional_summary + " " + skills + " " + work_history + " " + education
    
    # Generate embedding for the full text
    embedding = model.encode(text)  # Single document at a time
    
    # Each entity contains id, vector, and metadata (without file_name)
    entity = {
        "id": i,
        "vector": embedding,  # Embedding for the resume text
        "text": text,  # Full text of the resume
        "profession": resume["profession"],  # Profession metadata
    }
    
    data.append(entity)

# Now, 'data' contains the resumes with ids, vectors, and metadata (without file_name)
print(f"Data has {len(data)} entities, each with fields: {data[0].keys()}")
print(f"Vector dim: {len(data[0]['vector'])}")


Data has 2484 entities, each with fields: dict_keys(['id', 'vector', 'text', 'profession'])
Vector dim: 384


In [12]:
if client.has_collection(collection_name="demo_collection"):
    client.drop_collection(collection_name="demo_collection")
client.create_collection(
    collection_name="demo_collection",
    dimension=384,  # The vectors we will use in this demo has 768 dimensions
)

In [13]:
res = client.insert(collection_name="demo_collection", data=data)

print(res)

{'insert_count': 2484, 'ids': [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209, 210, 211, 212, 213, 214, 215,

In [8]:
print(f"Total entities in the collection: {res['insert_count']}")

Total entities in the collection: 2484


In [24]:
question = "any candiate with experience in python and machine learning"

In [25]:
query_vectors = model.encode([question])
# If you don't have the embedding function you can use a fake vector to finish the demo:
# query_vectors = [ [ random.uniform(-1, 1) for _ in range(768) ] ]

search_res = client.search(
    collection_name="demo_collection",
    data=query_vectors,  # Use the `emb_text` function to convert the question to an embedding vector
    limit=3,  # Return top 3 results
  # Inner product distance
    output_fields=["text"],  # Return the text field
)

print(search_res)

data: ['[{\'id\': 2431, \'distance\': 0.47173744440078735, \'entity\': {\'text\': "Professional Summary\\nEnthusiastic computer engineer eager to contribute to team success through hard work, attention to detail and excellent organizational skills.\\nTechnical professional with complete understanding of entire software development life cycle. Respectful self-motivator gifted at finding reliable\\nsolutions for software issues. Experienced in c#, python, HTML, SQL, node.js/javascript and working knowledge of Restful API design &\\nimplementations. Fluent in English and Turkish and accustomed to working with cross-cultural, global teams. Skills\\nC#, HTML, CSS, JavaScript, 5 years of experience\\nSQL, 5 years of experience\\nPython, MatLab, MongoDB, Tableau, Node JS\\nFrameworks: .Net, Devexpress, TensorFlow, Keras, Scikit-learn, Pandas, NLTK.\\nSearch Engine Optimization\\nNet\\nAPI\\nCSS\\nClients\\nDatabase development\\nDesigning\\nEnglish\\nHTML\\nImage processing\\nJavaScript\\nLea

In [26]:

retrieved_lines_with_distances = [(res["entity"]["text"], res["distance"]) for res in search_res[0]]

In [27]:
print(json.dumps(retrieved_lines_with_distances, indent=4))

[
    [
        "Professional Summary\nEnthusiastic computer engineer eager to contribute to team success through hard work, attention to detail and excellent organizational skills.\nTechnical professional with complete understanding of entire software development life cycle. Respectful self-motivator gifted at finding reliable\nsolutions for software issues. Experienced in c#, python, HTML, SQL, node.js/javascript and working knowledge of Restful API design &\nimplementations. Fluent in English and Turkish and accustomed to working with cross-cultural, global teams. Skills\nC#, HTML, CSS, JavaScript, 5 years of experience\nSQL, 5 years of experience\nPython, MatLab, MongoDB, Tableau, Node JS\nFrameworks: .Net, Devexpress, TensorFlow, Keras, Scikit-learn, Pandas, NLTK.\nSearch Engine Optimization\nNet\nAPI\nCSS\nClients\nDatabase development\nDesigning\nEnglish\nHTML\nImage processing\nJavaScript\nLeadership\nMarketing\nMatLab\nC#\nOffice\nWindows\nProject management\nSpeaker\nPython\n

In [28]:
context = "\n".join([line_with_distance[0] for line_with_distance in retrieved_lines_with_distances])

PROMPT = """
Use the following pieces of information enclosed in <context> tags to provide an answer to the question enclosed in <question> tags.
<context>
{context}
</context>
<question>
{question}
</question>
"""

In [19]:
from huggingface_hub import InferenceClient

repo_id = "mistralai/Mixtral-8x7B-Instruct-v0.1"

llm_client = InferenceClient(model=repo_id, timeout=180)

In [30]:
prompt = PROMPT.format(context=context, question=question)

In [31]:
answer = llm_client.text_generation(
    prompt,
    max_new_tokens=10000,
).strip()

print(answer)

Yes, there is a candidate with experience in Python and machine learning. According to the provided context, the candidate has experience in Python, MatLab, and machine learning frameworks such as TensorFlow, Keras, Scikit-learn, and NLTK. They have also worked on projects involving machine learning, such as the Question Answering System project, which used BERT and ELMO language models, and the Pre-Assess Melanoma project, which used image processing algorithms.


In [2]:
from milvus_model.hybrid import BGEM3EmbeddingFunction

ef = BGEM3EmbeddingFunction(use_fp16=False, device="cuda")
dense_dim = ef.dim["dense"]

Fetching 30 files:   0%|          | 0/30 [00:00<?, ?it/s]

  colbert_state_dict = torch.load(os.path.join(model_dir, 'colbert_linear.pt'), map_location='cpu')
  sparse_state_dict = torch.load(os.path.join(model_dir, 'sparse_linear.pt'), map_location='cpu')


In [80]:
e = ef(["shit"])

In [95]:
e['sparse'][0]

<1x250002 sparse matrix of type '<class 'numpy.float64'>'
	with 1 stored elements in Compressed Sparse Row format>

In [3]:
# Load resumes from the dataset.json file
with open("data.json", "r") as file:
    resumes = json.load(file)

# Prepare the data in the required format
data = []

for i, resume in tqdm(enumerate(resumes)):
    # Combine resume fields to create the full text
    professional_summary = resume.get("professional_summary", "")
    skills = resume.get("skills", "")
    work_history = resume.get("work_history", "")
    education = resume.get("education", "")
    text = professional_summary + " " + skills + " " + work_history + " " + education
    
    # Generate embedding for the full text
    embedding = ef([text])
    sparse_vector = embedding["sparse"][0]
    dense_vector = embedding["dense"][0]
    
    # Each entity contains id, vector, and metadata (without file_name)
    entity = {
        "id": i,
        "dense": dense_vector,  # Embedding for the resume text
        "sparse":sparse_vector,
        "text": text,  # Full text of the resume
        "profession": resume["profession"],  # Profession metadata
        "file_name":resume["file_name"]
    }
    
    data.append(entity)

# Now, 'data' contains the resumes with ids, vectors, and metadata (without file_name)
print(f"Data has {len(data)} entities, each with fields: {data[0].keys()}")
print(f"Vector dim: {len(data[0]['dense'])}")


0it [00:00, ?it/s]

Data has 2484 entities, each with fields: dict_keys(['id', 'dense', 'sparse', 'text', 'profession', 'file_name'])
Vector dim: 1024


In [15]:
len(data[0]["dense"][0])

1024

In [16]:
print(dense_dim)

1024


In [3]:
client = MilvusClient("milvus.db")

In [7]:
fields = [
    # Use auto generated id as primary key
    FieldSchema(
        name="id", dtype=DataType.INT64, is_primary=True, max_length=100
    ),
    FieldSchema(name="sparse", dtype=DataType.SPARSE_FLOAT_VECTOR),
    FieldSchema(name="dense", dtype=DataType.FLOAT_VECTOR, dim=dense_dim),
    # Store the original text to retrieve based on semantically distance
    FieldSchema(name="text", dtype=DataType.VARCHAR, max_length=60000),
    FieldSchema(name="profession", dtype=DataType.VARCHAR, max_length=256),
    FieldSchema(name="file_name", dtype=DataType.VARCHAR, max_length=256),
    # Milvus now supports both sparse and dense vectors,
    # we can store each in a separate field to conduct hybrid search on both vectors
    
]
schema = CollectionSchema(fields)



In [None]:
col_name = "hybrid_demo"
if client.has_collection(collection_name=col_name):
    client.drop_collection(collection_name=col_name)
client.create_collection(
    collection_name=col_name,
    dimension=dense_dim,
    schema=schema,
 
)

In [6]:
index_params = client.prepare_index_params()
index_params.add_index(
    field_name="sparse",
    index_name="sparse_inverted_index",
    index_type="SPARSE_INVERTED_INDEX",
    metric_type="IP"
)

index_params.add_index(
    field_name="dense",
    index_name="dense",
    index_type="AUTOINDEX",
    metric_type="IP"
)
client.create_index(collection_name=col_name, index_params=index_params)

In [7]:
data

[{'id': 0,
  'dense': array([-0.05966879, -0.01730734, -0.04933164, ...,  0.03529824,
          0.02419243,  0.00179837], dtype=float32),
  'sparse': <1x250002 sparse matrix of type '<class 'numpy.float64'>'
  	with 301 stored elements in Compressed Sparse Row format>,
  'text': " Skills\nBusiness Management, conversion, Client, Customer Satisfaction, Customer Services, customer service experience, Direct Sales, direction,\nExecutive Management, focus, forms, hiring, languages, Director, marketing, meetings, works, Enterprise, Network, performance management,Quality, , real time, recruiting, Sales, Sales Analysis, Spanish, Strateg Experience\nDirector of Operations, BPO\n \n11/2013\n \nto \n04/2015\n \nCompany Name\n \nCity\n \n, \nState\nResponsible for 3 Sales Locations (Over 200 employees) managed team of 200 professional that consisted of hiring, training, and\nperformance management.\nStrengthened the client relationship between the client and Concentrix.\nIncreased Client revenue

In [40]:
len(data)

2484

In [8]:
res = client.insert(collection_name=col_name, data=data)

print(f"Total entities inserted: {res['insert_count']}")

Total entities inserted: 2484


In [9]:
# Enter your search query
query = "give me a candidate with experience in python and machine learning with projects in computer vision"


# Generate embeddings for the query
query_embeddings = ef([query])
# print(query_embeddings)

In [8]:
col_name = "hybrid_demo"
# if utility.has_collection(col_name):
#     Collection(col_name).drop()
connections.connect(uri="./milvus.db")
col = Collection(col_name, schema, consistency_level="Strong")

In [10]:
from pymilvus import (
    AnnSearchRequest,
    WeightedRanker,
)


def dense_search(col, query_dense_embedding, limit=10):
    search_params = {"metric_type": "IP", "params": {}}
    res = col.search(
        [query_dense_embedding],
        anns_field="dense",
        limit=limit,
        output_fields=["text","file_name"],
        param=search_params,
    )[0]
    return [(hit.get("text"),hit.get("file_name")) for hit in res]


def sparse_search(col, query_sparse_embedding, limit=10):
    search_params = {
        "metric_type": "IP",
        "params": {},
    }
    res = col.search(
        [query_sparse_embedding],
        anns_field="sparse",
        limit=limit,
        output_fields=["text"],
        param=search_params,
    )[0]
    return [(hit.get("text"),hit.get("file_name")) for hit in res]


def hybrid_search(
    col,
    query_dense_embedding,
    query_sparse_embedding,
    sparse_weight=1.0,
    dense_weight=1.0,
    limit=10,
):
    dense_search_params = {"metric_type": "IP", "params": {}}
    dense_req = AnnSearchRequest(
        [query_dense_embedding], "dense", dense_search_params, limit=limit
    )
    sparse_search_params = {"metric_type": "IP", "params": {}}
    sparse_req = AnnSearchRequest(
        [query_sparse_embedding], "sparse", sparse_search_params, limit=limit
    )
    rerank = WeightedRanker(sparse_weight, dense_weight)
    res = col.hybrid_search(
        [sparse_req, dense_req], rerank=rerank, limit=limit, output_fields=["text","file_name"]
    )[0]
    return [(hit.get("text"),hit.get("file_name")) for hit in res]

In [11]:
dense_results = dense_search(col, query_embeddings["dense"][0], limit=3)
sparse_results = sparse_search(col, query_embeddings["sparse"][[0]], limit=3)
hybrid_results = hybrid_search(
    col,
    query_embeddings["dense"][0],
    query_embeddings["sparse"][[0]],
    sparse_weight=0.7,
    dense_weight=1.0,
    limit=3,
)

In [14]:
dense_results[0][0]

' Skills\nSQL Server/MySQL\nData Analysis and Visualization\nC#/.NET framework\nPython\nJavaScript\nAzure DevOps\nEducation\nMaster of Science\n \nComputer Science\n \nUniversity of Illinois At Springfield\n \n, \nCity\n \n, \nState\nCompleted coursework in Data Science, C# Programming and .NET and Data Mining.\n3.95/4.0 GPA\nMay 2016\nBachelor of Engineering\n \nInformation Science\n \nP.A College of Engineering (VTU)\n \n, \nCity\n"Best Outgoing Student" Recipient\nHonor Roll 2010-2011\nMay 201 Experience\nPrincipal Consultant\n \nCompany Name\n \n| \nCity\n \n, \nState\n \n| \nJune 2018\n \n- \nCurrent\nDesign, Develop, Test and Document Business Intelligence reports:\nGather Business needs to develop interactive dashboards and BI reports.\nWork with Data warehouse and other data sources to get data for Data Visualization Using Business Intelligence Tool.\nWrite SQL Queries and Stored procedures to pull data to Visualization Tool.\nModeling the data to efficiently pull the data for 

In [15]:
retrieved = [[res[0], res[1]] for res in hybrid_results]
retrieved

[[' Skills\nSQL Server/MySQL\nData Analysis and Visualization\nC#/.NET framework\nPython\nJavaScript\nAzure DevOps\nEducation\nMaster of Science\n \nComputer Science\n \nUniversity of Illinois At Springfield\n \n, \nCity\n \n, \nState\nCompleted coursework in Data Science, C# Programming and .NET and Data Mining.\n3.95/4.0 GPA\nMay 2016\nBachelor of Engineering\n \nInformation Science\n \nP.A College of Engineering (VTU)\n \n, \nCity\n"Best Outgoing Student" Recipient\nHonor Roll 2010-2011\nMay 201 Experience\nPrincipal Consultant\n \nCompany Name\n \n| \nCity\n \n, \nState\n \n| \nJune 2018\n \n- \nCurrent\nDesign, Develop, Test and Document Business Intelligence reports:\nGather Business needs to develop interactive dashboards and BI reports.\nWork with Data warehouse and other data sources to get data for Data Visualization Using Business Intelligence Tool.\nWrite SQL Queries and Stored procedures to pull data to Visualization Tool.\nModeling the data to efficiently pull the data fo

In [16]:
context = "\n".join([line_with_distance[0] for line_with_distance in retrieved])

In [17]:
context

' Skills\nSQL Server/MySQL\nData Analysis and Visualization\nC#/.NET framework\nPython\nJavaScript\nAzure DevOps\nEducation\nMaster of Science\n \nComputer Science\n \nUniversity of Illinois At Springfield\n \n, \nCity\n \n, \nState\nCompleted coursework in Data Science, C# Programming and .NET and Data Mining.\n3.95/4.0 GPA\nMay 2016\nBachelor of Engineering\n \nInformation Science\n \nP.A College of Engineering (VTU)\n \n, \nCity\n"Best Outgoing Student" Recipient\nHonor Roll 2010-2011\nMay 201 Experience\nPrincipal Consultant\n \nCompany Name\n \n| \nCity\n \n, \nState\n \n| \nJune 2018\n \n- \nCurrent\nDesign, Develop, Test and Document Business Intelligence reports:\nGather Business needs to develop interactive dashboards and BI reports.\nWork with Data warehouse and other data sources to get data for Data Visualization Using Business Intelligence Tool.\nWrite SQL Queries and Stored procedures to pull data to Visualization Tool.\nModeling the data to efficiently pull the data for 

In [18]:
PROMPT = """
Use the following pieces of information enclosed in <context> tags to provide an answer to the question enclosed in <question> tags.
<context>
{context}
</context>
<question>
{question}
</question>
"""

In [21]:
prompt = PROMPT.format(context=retrieved, question=query)
answer = llm_client.text_generation(
    prompt,
    max_new_tokens=1000,
).strip()
print(answer)

The candidate with experience in Python and machine learning, and projects in computer vision is the first candidate provided. They have completed a project predicting breast cancer using Python and the Random Forest Classifier for machine learning. They have also completed a project in image classification, constructing training and test data for given image data using Java programming and constructing five different classifier models using each training data file. They achieved 95% accuracy in this project.


In [65]:
def doc_text_formatting(ef, query, docs):
    tokenizer = ef.model.tokenizer
    query_tokens_ids = tokenizer.encode(query, return_offsets_mapping=True)
    query_tokens = tokenizer.convert_ids_to_tokens(query_tokens_ids)
    formatted_texts = []

    for doc in docs:
        ldx = 0
        landmarks = []
        encoding = tokenizer.encode_plus(doc, return_offsets_mapping=True)
        tokens = tokenizer.convert_ids_to_tokens(encoding["input_ids"])[1:-1]
        offsets = encoding["offset_mapping"][1:-1]
        for token, (start, end) in zip(tokens, offsets):
            if token in query_tokens:
                if len(landmarks) != 0 and start == landmarks[-1]:
                    landmarks[-1] = end
                else:
                    landmarks.append(start)
                    landmarks.append(end)
        close = False
        formatted_text = ""
        for i, c in enumerate(doc):
            if ldx == len(landmarks):
                pass
            elif i == landmarks[ldx]:
                if close:
                    formatted_text += "</span>"
                else:
                    formatted_text += "<span style='color:red'>"
                close = not close
                ldx = ldx + 1
            formatted_text += c
        if close is True:
            formatted_text += "</span>"
        formatted_texts.append(formatted_text)
    return formatted_texts

In [69]:
from IPython.display import Markdown, display

# Dense search results
display(Markdown("**Dense Search Results:**"))
formatted_results = doc_text_formatting(ef, query, dense_results)
for result in dense_results:
    display(Markdown(result[0]))

# Sparse search results
display(Markdown("\n**Sparse Search Results:**"))
formatted_results = doc_text_formatting(ef, query, sparse_results)
for result in formatted_results:
    display(Markdown(result[0]))

# Hybrid search results
display(Markdown("\n**Hybrid Search Results:**"))
formatted_results = doc_text_formatting(ef, query, hybrid_results)
for result in formatted_results:
    display(Markdown(result[0]))

**Dense Search Results:**

 Skills
SQL Server/MySQL
Data Analysis and Visualization
C#/.NET framework
Python
JavaScript
Azure DevOps
Education
Master of Science
 
Computer Science
 
University of Illinois At Springfield
 
, 
City
 
, 
State
Completed coursework in Data Science, C# Programming and .NET and Data Mining.
3.95/4.0 GPA
May 2016
Bachelor of Engineering
 
Information Science
 
P.A College of Engineering (VTU)
 
, 
City
"Best Outgoing Student" Recipient
Honor Roll 2010-2011
May 201 Experience
Principal Consultant
 
Company Name
 
| 
City
 
, 
State
 
| 
June 2018
 
- 
Current
Design, Develop, Test and Document Business Intelligence reports:
Gather Business needs to develop interactive dashboards and BI reports.
Work with Data warehouse and other data sources to get data for Data Visualization Using Business Intelligence Tool.
Write SQL Queries and Stored procedures to pull data to Visualization Tool.
Modeling the data to efficiently pull the data for visualization
Design Dashboard and Provide insights to management and departments to make Data driven decisions.
Embed BI reports to Customer Application using .NET Framework, JavaScript. Which uses Web APIs to access BI reports.
Create and Peer review SQL queries for ad hoc data reporting.
Managed junior developer by delivering consistent coaching and constructive feedback.
Design, Develop, Test and Document SQL Queries, Stored Procedures, .NET Codes in order to maintain and add features to Student
Information System.
Communicate with clients to gather requirement for new data collection or for change request.
Design a data model and web interface for collecting new data and improve existing system.
Peer review codes to validate correctness and integrity of the system.
Perform Unit test, Integration test and Regression test in Development and QA environment for Build Release.
Follow Agile Methodology for software development using Azure DevOps for better utilization of time and resources.
Create SQL queries for ad hoc data reporting.
Support end users resolving technical issues.
Business Intelligence Developer
 
Company Name
 
| 
City
 
, 
State
 
| 
September 2015
 
- 
May 2018
Designing, developing, validating, and documenting SQL queries, graphical reports, dashboards, and data warehouse applications that support the
strategic initiatives.
Reviewed project requirements to identify customer expectations and resources needed to meet goals.
Conceived, developed and implemented wide range of BI solutions.
Optimized data gathering processes, analysis procedures and visualization strategies.
Created and oversaw dashboards with QlikView.
Create and maintain SSIS packages using Microsoft Visual Studio. Extract the data from files received through SFTP and upload the data-
to-data warehouse in a required form.
Using given technical specs create flat files and send data files to clients using SSIS package. Also, automate maintenance of SQL Server
databases and updates.
Analyzed code and corrected errors to optimize output.
Resolved customer issues by establishing workarounds and solutions to debug and create defect fixes.
Wrote user manuals and other documentation for roll-out in customer training sessions.
Established and maintained key relationships with business stakeholders to promote future opportunities.
Managed 2 junior developers by delivering consistent coaching and constructive feedback.
College Student
 
Company Name
 
| 
City
 
, 
State
 
| 
January 2015
 
- 
May 2016
Most accomplished College Projects:
Predicting Breast Cancer:
Create machine learning model to predict malignant tumors.
Used Python - "Random Forest Classifier" to predict malignant tumors in breast tissue.
Here, main idea is building multiple models with different sample and different initial variables from train data set.
Goal is to determine what attributes provide the most information that can be used to predict malignancy.
My model was tested against â€œtest data setâ€​ for accuracy and it is 96% successful in predicting whether the tumor is malignant or
benign.
Data Mining- Image Classification:
It is the process to categorize images into one of several categories. Classification requires training for each image category.
Constructed Training and Test data for given image data using Java programming.
Constructed the five different classifier models using each training data file.Tested these classifiers using test data and compared the prediction accuracies among five different classifiers. Achieved 95% accuracy.
Software Engineer
 
Company Name
 
| 
City
 
, 
State
 
| 
October 2013
 
- 
January 2014
Develop code in java and document artifacts including unit test plans and ensure that the output is as per the specifications:
Execute tasks with both procedural and OOP development techniques.
Best practices for efficient and easier to maintain code.
Best practices for securing web applications.
Inserting, querying and managing data stored in databases or files.
Skills
SQL Server/MySQL
Data Analysis and Visualization
C#/.NET framework
Python
JavaScript
Azure DevOps Education
Master of Science
 
Computer Science
 
University of Illinois At Springfield
 
, 
City
 
, 
State
Completed coursework in Data Science, C# Programming and .NET and Data Mining.
3.95/4.0 GPA
May 2016
Bachelor of Engineering
 
Information Science
 
P.A College of Engineering (VTU)
 
, 
City
"Best Outgoing Student" Recipient
Honor Roll 2010-2011
May 2013

Professional Summary
Enthusiastic computer engineer eager to contribute to team success through hard work, attention to detail and excellent organizational skills.
Technical professional with complete understanding of entire software development life cycle. Respectful self-motivator gifted at finding reliable
solutions for software issues. Experienced in c#, python, HTML, SQL, node.js/javascript and working knowledge of Restful API design &
implementations. Fluent in English and Turkish and accustomed to working with cross-cultural, global teams. Skills
C#, HTML, CSS, JavaScript, 5 years of experience
SQL, 5 years of experience
Python, MatLab, MongoDB, Tableau, Node JS
Frameworks: .Net, Devexpress, TensorFlow, Keras, Scikit-learn, Pandas, NLTK.
Search Engine Optimization
Net
API
CSS
Clients
Database development
Designing
English
HTML
Image processing
JavaScript
Leadership
Marketing
MatLab
C#
Office
Windows
Project management
Speaker
Python
Sales
Search Engine Optimization
Spanish
SQL
System architecture
Turkish
User Interface
Web site
Written Experienced in c#, python, HTML, SQL, node.js/javascript and working knowledge of Restful API design &
implementations. Fluent in English and Turkish and accustomed to working with cross-cultural, global teams.
Skills
C#, HTML, CSS, JavaScript, 5 years of experience
SQL, 5 years of experience
Python, MatLab, MongoDB, Tableau, Node JS
Frameworks: .Net, Devexpress, TensorFlow, Keras, Scikit-learn, Pandas, NLTK.
Search Engine Optimization
Net
API
CSS
Clients
Database development
Designing
English
HTML
Image processing
JavaScript
Leadership
Marketing
MatLab
C#
Office
Windows
Project management
Speaker
Python
Sales
Search Engine Optimization
Spanish
SQL
System architecture
Turkish
User Interface
Web site
Written
Work History
Software Developer
 
, 
12/2015
 
to 
Current
 
Company Name
 
â€“ 
City
 
, 
State
 
Project was tasked to design an Internal Marketing System using c#,SQL, HTML,.net, devexpress.
Amazon,Walmart,eBay and Shopify.
API's were integrated.
The system records the end to end process.
A scalable system architecture to support high-availability Internet site with various internal applicationswas key.
My responsibilities were:.
Integration of marketplace APIs.
Coordinated with project management staff on database development timelines and project scope.
Revised, modularized and updated old code bases to modern development standards.
Authored code fixes and enhancements for inclusion in future code releases and patches.
The successful launch of these applications enabled company growth of 30%.
Computer Engineer Intern
 
, 
06/2013
 
to 
09/2013
 
Company Name
 
â€“ 
City
 
Tested the User Interface for Web site and provided feedback to increase user satisfaction, reduce reaction time and improve site
mappings.
Executed Performance testing on internal applications, documented results and maintained logs.
Computer Engineer Intern
 
, 
06/2012
 
to 
09/2012
 
Company Name
 
â€“ 
City
 
Our team has created an application for Turkish Patent and Trademark Office using MatLab, C#, SQL and HTML.
My responsibilities within that team was:.
Developed the FAQ and Support part of the web site.
Researched various studies and recommended alternate algorithms for Image processing to increase accuracy and speed.
Tested and documented prototypes and standard products.
Maintained positive work atmosphere by building relationships with co-workers, customers, clients and management.
PROJECTS Question Answering System: As the sole resource on this project created a windows application with Phyton for COVID-19
related questions using BERT and ELMO language models.
The Frameworks were Pandas, Tensorflow, NLTK.
Data Visualization Tool: The objective of this project was to analyze communication structures, employee relationships and suspicious
transactions by designing and implementing a data visualization tool.
The dataset included the VAST Challenge 2019 dataset with over 100,000+ data.The Frameworks were Flask, Pandas and the Language was Phyton.
My responsibilities were data cleansing and data categorization.
From the data visualization Leadership was able to get insight in the growth, sales, costs and make decisions towards better performance.
Pre-Assess Melanoma: The objective of the project was to create a system that could pre-assess malignant melanoma using Matlab
Language and image processing algorithms.
The system was trained with the images of malignant and healthy images.
My responsibilities included all development, testing and training.
The User Interface was the responsibility of other team member.
This system enabled the users to seek healthcare at earlier stages of melanoma. Education
IN
 
: 
Expected in 
06/2021
 
Rowan University
 
- 
City
 
, 
State
 
Bachelor of Science
 
: 
Computer Engineering
 
, 
06/2015
 
Selcuk University
 
- 
City
 
Master of Science
 
State
 
Work History
Software Developer
 
, 
12/2015
 
to 
Current
 
Company Name
 
â€“ 
City
 
, 
State
 
Project was tasked to design an Internal Marketing System using c#,SQL, HTML,.net, devexpress.
Amazon,Walmart,eBay and Shopify.
API's were integrated.
The system records the end to end process.
A scalable system architecture to support high-availability Internet site with various internal applications was key.
My responsibilities were:.
Integration of marketplace APIs.
Coordinated with project management staff on database development timelines and project scope.
Revised, modularized and updated old code bases to modern development standards.
Authored code fixes and enhancements for inclusion in future code releases and patches.
The successful launch of these applications enabled company growth of 30%.
Computer Engineer Intern
 
, 
06/2013
 
to 
09/2013
 
Company Name
 
â€“ 
City
 
Tested the User Interface for Web site and provided feedback to increase user satisfaction, reduce reaction time and improve site
mappings.
Executed Performance testing on internal applications, documented results and maintained logs.
Computer Engineer Intern
 
, 
06/2012
 
to 
09/2012
 
Company Name
 
â€“ 
City
 
Our team has created an application for Turkish Patent and Trademark Office using MatLab, C#, SQL and HTML.
My responsibilities within that team was:.
Developed the FAQ and Support part of the web site.
Researched various studies and recommended alternate algorithms for Image processing to increase accuracy and speed.
Tested and documented prototypes and standard products.
Maintained positive work atmosphere by building relationships with co-workers, customers, clients and management.
PROJECTS Question Answering System: As the sole resource on this project created a windows application with Phyton for COVID-19
related questions using BERT and ELMO language models.
The Frameworks were Pandas, Tensorflow, NLTK.
Data Visualization Tool: The objective of this project was to analyze communication structures, employee relationships and suspicious
transactions by designing and implementing a data visualization tool.
The dataset included the VAST Challenge 2019 dataset with over 100,000+ data.
The Frameworks were Flask, Pandas and the Language was Phyton.
My responsibilities were data cleansing and data categorization.
From the data visualization Leadership was able to get insight in the growth, sales, costs and make decisions towards better performance.
Pre-Assess Melanoma: The objective of the project was to create a system that could pre-assess malignant melanoma using Matlab
Language and image processing algorithms.
The system was trained with the images of malignant and healthy images.
My responsibilities included all development, testing and training.
The User Interface was the responsibility of other team member.
This system enabled the users to seek healthcare at earlier stages of melanoma.
Publications
English for Science, Technology, Engineering, and Mathematics University of Virginia Darden School of Business Getting Started: Agile MeetsDesign Thinking University of Pennsylvania Biology Meets Programming: Bioinformatics for Beginners Coursera Course Certificates
Languages
Fluent in written and spoken English
Spanish B1 level
Native speaker of Turkish
Skills
C#, HTML, CSS, JavaScript, 5 years of experience
SQL, 5 years of experience
Python, MatLab, MongoDB, Tableau, Node JS
Frameworks: .Net, Devexpress, TensorFlow, Keras, Scikit-learn, Pandas, NLTK.
Search Engine Optimization,
Net, API, CSS, clients, database development, designing, English, HTML, Image processing, JavaScript, Leadership, Marketing, MatLab, C#,
Office, windows, project management, speaker, Python, sales, Search Engine Optimization, Spanish, SQL, system architecture, Turkish, User
Interface, Web site, written

 Skills
3D, 3D Studio Max, Adobe, Photoshop, Architectural Drafting, AutoCAD, Basic, book, CAD, Conversion, Crystal Reports, Customer Service,
data dictionary, databases, database, drafting, Edit, electrical engineering, engineer, xml, features, Fiber Optics, GIS, GPS, HPS, Illustrator,
InDesign, Java, Lighting, Mapper 6, MS Access, C#, Microsoft Office, Office, MS Project, 3.1, Windows XP, MicroStation, enterprise,
NetWORKS 4.7.1, next, Operating Systems, DB, coding, research, Revit, supervisor, phone, transformers, transformer, Vista, Wirin Experience
Company Name
 
Engineering Technician
 
07/2014
 
to 
Current
Updating the map book settings to publish water tiles of atlas map Generating system valve and hydrant data per tile using Crystal Reports
Creating Tree GIS database - subtypes, domains, required fields with data types, geo-coding about 38,000 tree addresses Analyzing DB
and generating reports per engineer requests Digitizing of Fiber Optics and tube allocation using Telvent fiber manager Creating custom
maps for research studies Updating GIS, CAD, and other records based on asbuilts received Cover the front counter answering phone calls
and customer walk-ins (when necessary) AutoCAD drafting (street lighting, substations, electrical substructure and wiring, and customer
primary services) Perform various queries for street lighting, electrical engineering, and water engineering departments Updated GWP
Electric and Water 5 Year CIP Plan Cross training with Electrical Planners - New and updated meter spots Created address geocode and
updated xml to set as default Review, reconcile, and post GIS edits to parent version while supervisor is on vacation Linked transformers to
customer meter data for transformer load study.
Company Name
 
Assistant Engineering Technician
 
11/2013
 
to 
07/2014
Maintained the ArcFM stored display for the Electric viewers and editors Edit of electrical features in a versioned enterprise database Error
check and tracing for Phase and Feeder connectivity Consolidated five street light databases into one (GPS, Versioned GIS, AutoCAD/MS
Access, Disconnected, and Duplicated) Created custom street light maps for the Street light HPS Conversion Master Plan Updated
Customer Service, Substructure, Substation, Street Lighting and Wiring drawings Conversion of paper and scanned images to AutoCAD
format Researched new City Fiber routes using GIS Reviewed and fixed any errors for cartographic representation Worked as a team for a
two phase Service Point ID mapping for electrical services to the customer side Helped Engineering staff with any GIS related issues
Checked off permits for new constructions Updated wall maps at the Utility Operation Center and Howard Substation Updated and plotted
4kV/12kV feeder conversion map for the next three years Worked with ISD-IT to publish web maps.
Company Name
 
Hourly City Worker
 
12/2007
 
to 
09/2012
Approved Electrical and Fiber Optic edits to parent version Updates to the GIS manual for work procedures Trained co-workers to use
GIS / ArcFM Created data dictionary for GPS collection project of the Street Lights Prepared maps and reports for extension of Fiber
Optics Reviewed Plan Checks for GWP Fiber Optics Plotted sectional and primary maps from CAD and GIS Installation of ArcGIS and
ArcFM Created and maintained personal geodatabases Assisted in creation of mapping standards and other procedures Updated the 5
Year CIP Plan Mapped/Updated electrical assets within GIS and CADD: Electric Vehicle (EV), Service Drops, Tropos, Ice Bears,
Underground/Overhead Distribution lines, Vaults, Pull Boxes, Basemap, Poles, Pedestals Assisted in obtaining bearing and distances for
electrical easement deed Digitized electrical and water easements Updated Glendale's Municipal Code Book. Education
Bachelor of Science
 
:
 
Geography Social Sciences
 
June, 2012
 
California Polytechnic State University Pomona GIS
 
GPA: 
Pi Gamma Mu
Geography Pi Gamma Mu Social Sciences
Associates Degree
 
:
 
Architecture
 
June, 2009
 
Glendale Community College
 
Architecture
Civil Design Technology
 
December, 2012
 
Rio Hondo Community College
 
Civil Design Technology
GCC Certificate, Architectural CAD GCC Certificate, Architectural Drafting
 
GPA: 
President and Vice President of the American Society of
Engineers and Architects Club (ASEA) 
Dean's list
 
President and Vice President of the American Society of Engineers and Architects Club(ASEA) 
Dean's list
Work History
Company Name
Professional Affiliations
ESRI User Conference, Attended Seminars/Workshops (San Diego, CA) Basic Data Collection using TerraSync v4.0x Field Software GPS Data
Collection for GIS - Best Practice 
ESRI Certificate, Cartographic Design Using ArcGIS 9
Additional Information
2 | Page
Skills
3D, 3D Studio Max, Adobe, Photoshop, Architectural Drafting, AutoCAD, Basic, book, CAD, Conversion, Crystal Reports, Customer Service,
data dictionary, databases, database, drafting, Edit, electrical engineering, engineer, xml, features, Fiber Optics, GIS, GPS, HPS, Illustrator,
InDesign, Java, Lighting, Mapper 6, MS Access, C#, Microsoft Office, Office, MS Project, 3.1, Windows XP, MicroStation, enterprise,
NetWORKS 4.7.1, next, Operating Systems, DB, coding, research, Revit, supervisor, phone, transformers, transformer, Vista, Wiring


**Sparse Search Results:**

TypeError: TextInputSequence must be str