In [1]:
import numpy as np
import pandas as pd
import pickle
import torch
import re
import spacy
import os

from tqdm.auto import tqdm, trange

from utils import embedding, get_embedding, text_cleaning, find_top_n , tfidf_sentences
from utils import production_labeling, material_labeling, feature_labeling 

from flair.embeddings import WordEmbeddings, FlairEmbeddings, DocumentPoolEmbeddings
from flair.data import Sentence
from sklearn.feature_extraction.text import TfidfVectorizer

In [2]:
# Loading spacy nlp model
nlp = spacy.load("en_core_web_sm")

In [3]:
title_list = []

for file in os.listdir("Local pdf text files"):
  if file.endswith(".txt"):
    title_list.append(file.split(".")[0])

In [4]:
pdf_list = []
    
for file in os.listdir("Local pdf text files"):
  if file.endswith(".txt"):
    with open("Local pdf text files/"+file,"r",encoding="utf8") as f:
        pdf_list.append(f.read())

In [5]:
pdfs = list(map(lambda x: text_cleaning(x,nlp), pdf_list))

In [6]:
pdf_list_lemma = []

for text in pdfs:
    pdf_list_lemma.append(' '.join([token.lemma_ for token in text]))

In [7]:
vectorizer = TfidfVectorizer(max_df=0.9,min_df=2,stop_words='english')
vectorizer.fit(pdf_list_lemma)

TfidfVectorizer(analyzer='word', binary=False, decode_error='strict',
                dtype=<class 'numpy.float64'>, encoding='utf-8',
                input='content', lowercase=True, max_df=0.9, max_features=None,
                min_df=2, ngram_range=(1, 1), norm='l2', preprocessor=None,
                smooth_idf=True, stop_words='english', strip_accents=None,
                sublinear_tf=False, token_pattern='(?u)\\b\\w\\w+\\b',
                tokenizer=None, use_idf=True, vocabulary=None)

In [8]:
cos = torch.nn.CosineSimilarity(dim=0, eps=1e-6)

In [9]:
label_1 = get_embedding('selective laser melting')
label_2 = get_embedding('direct metal laser sintering')

label_3 = get_embedding('fused deposition modeling')
label_4 = get_embedding('fused filament fabrication')
label_5 = get_embedding('extrusion based additive manufacturing')

metal = get_embedding('metal')
ceramic = get_embedding('ceramic')
polymer = get_embedding('polymer')

feature_1 = get_embedding('fracture toughness')
feature_2 = get_embedding('tensile strength')
feature_3 = get_embedding('yield strength')
feature_4 = get_embedding('elastic modulus')
feature_5 = get_embedding('strain fracture break')
feature_6 = get_embedding('weibull modulus')



In [10]:
sentences = tfidf_sentences(pdf_list_lemma,vectorizer,3)
production , production_cos_score = production_labeling(sentences,cos,label_1,label_2,label_3,label_4,label_5)
material , material_cos_score = material_labeling(sentences,cos,metal,ceramic,polymer)

HBox(children=(IntProgress(value=0, max=99), HTML(value='')))




HBox(children=(IntProgress(value=0, max=99), HTML(value='')))




HBox(children=(IntProgress(value=0, max=99), HTML(value='')))




HBox(children=(IntProgress(value=0, max=99), HTML(value='')))




In [11]:
d = {'Titles':title_list,'Abstracts':pdf_list,'Production':production,'P_score':production_cos_score,'Material':material,'M_score':material_cos_score}
df = pd.DataFrame(d)

In [12]:
df["Production/Material"] = df["Production"] + " / " + df["Material"]

In [13]:
df.head(20)

Unnamed: 0,Titles,Abstracts,Production,P_score,Material,M_score,Production/Material
0,3D gel-printing of zirconia ceramic parts,3D gel-printing (3DGP) is a new printing metho...,SLM or DMLS,0.4,Ceramic,0.33,SLM or DMLS / Ceramic
1,3D Printed Glass Surface Finish and Bulk Prope...,It is impossible to print glass directly from ...,SLM or DMLS,0.66,Metal,0.54,SLM or DMLS / Metal
2,3D printing of ceramics A review,Along with extensive research on the three-dim...,SLM or DMLS,0.65,Ceramic,0.65,SLM or DMLS / Ceramic
3,3D Printing of Continuous-Fiber Composites by ...,We have developed a method for the three-dimen...,SLM or DMLS,0.59,Polymer,0.6,SLM or DMLS / Polymer
4,3D Printing of Transparent Glass,Traditional assembly line manufacturing is spe...,SLM or DMLS,0.62,Metal,0.51,SLM or DMLS / Metal
5,A Review of Additive Manufacturing,Additive manufacturing processes take the info...,FDM or FFF or EAM,0.8,Metal,0.38,FDM or FFF or EAM / Metal
6,Additive manufacturing and its societal impact...,"Thirty years into its development, additive ma...",FDM or FFF or EAM,0.74,Polymer,0.39,FDM or FFF or EAM / Polymer
7,Additive manufacturing and mechanical characte...,Mechanical properties of additively manufactur...,FDM or FFF or EAM,0.58,Polymer,0.48,FDM or FFF or EAM / Polymer
8,Additive manufacturing of carbonfiber-reinforc...,Carbon fiber-reinforced plastic composites hav...,SLM or DMLS,0.67,Metal,0.63,SLM or DMLS / Metal
9,Additive Manufacturing of Ceramic Based Materials,This paper offers a review of present achievem...,SLM or DMLS,0.54,Metal,0.45,SLM or DMLS / Metal


In [14]:
df["Production"].value_counts()

SLM or DMLS          67
FDM or FFF or EAM    32
Name: Production, dtype: int64

In [15]:
df["Material"].value_counts()

Metal      39
Polymer    39
Ceramic    21
Name: Material, dtype: int64

In [16]:
df.to_excel('results.xlsx')