In [1]:
import pandas as pd
pd.set_option('display.max_colwidth', -1)

In [2]:
data = pd.read_csv('pristavki.csv', header=None, names=['text'])

In [3]:
from yargy import Parser, rule, or_
from yargy.predicates import in_, in_caseless, custom
from yargy.tokenizer import MorphTokenizer
from yargy.pipelines import morph_pipeline, caseless_pipeline
from yargy.interpretation import fact
from IPython.display import display

In [4]:
game = fact(
    'game',
    ['franchise', 'installment']
)

In [5]:
import re

pattern = re.compile(r'([a-z\d]+\s*)+', re.IGNORECASE)
installment = custom(lambda x: re.search(pattern, x).group() if re.search(pattern, x) is not None else None)

In [6]:
MC = rule(
    morph_pipeline(['mortal combat', 'мортал комбат', 'mc', 'мк']).interpretation(game.franchise),
    installment.interpretation(game.installment).optional()
)
NFS = rule(
    morph_pipeline(['need for speed', 'нид фор спид', 'нид фо спид', 'nfs', 'нфс']).interpretation(game.franchise),
    installment.interpretation(game.installment).optional()
)
BF = rule(
    morph_pipeline(['battlefield', 'battle field']).interpretation(game.franchise),
    installment.interpretation(game.installment).optional()
)
COD = rule(
    morph_pipeline(['call of duty', 'cod']).interpretation(game.franchise),
    installment.interpretation(game.installment).optional()
)
DS = rule(
    morph_pipeline(['dark souls', 'demon souls', 'demon\'s souls',
                    'bloodborne', 'bloodborn']).interpretation(game.franchise),
    installment.interpretation(game.installment).optional()
)

GAMES = or_(MC, NFS, BF, COD, DS).interpretation(game)
parser = Parser(GAMES)

In [7]:
matches = []

for sent in data.text:
    for match in parser.findall(sent):
        matches.append(match.fact)

In [8]:
len(matches)

857

In [9]:
for m in matches[:25]:
    if m.installment is not None:
        print(m.franchise, m.installment)
        continue
    print(m.franchise)

Mortal combat vs
Call of Duty Ghosts
Call of Duty 4
Call of Duty MW
Battlefield 3
Need For Speed Shift
Battlefield 4
Battlefield
Battlefield
Call Of Duty
Call Of Duty
Need For Speed
Battlefield 4
мортал комбат
nfs
Battlefield 4
Nfs 2016
Battlefield 4
Call of Duty
Bloodborn
Call of Duty MW
Battlefield 3
dark souls 3
Demon's Souls
Need For Speed Most


Вывод: код работает, однако в поле installment всегда записывается только один токен. Не очень понятно, как можно обойти это в yargy: документация по поводу функции custom весьма скудная.