In [2]:
!pip install spacy



In [1]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [3]:
!python -m spacy info

[1m

spaCy version    3.7.5                         
Location         /usr/local/lib/python3.10/dist-packages/spacy
Platform         Linux-6.1.85+-x86_64-with-glibc2.35
Python version   3.10.12                       
Pipelines        en_core_web_sm (3.7.1)        



In [4]:
 import spacy
 from spacy.tokens import DocBin
 from tqdm import tqdm

 nlp = spacy.blank("en") # load a new spacy model
 db = DocBin() # create a DocBin object

In [5]:
import json
f = open('/content/annotations.json')
TRAIN_DATA = json.load(f)

In [6]:
TRAIN_DATA

{'classes': ['ANXIETY_POSITIVE',
  'SCHIZOPHRENIA_POSITIVE',
  'DEPRESSION_POSITIVE',
  'MANIA_POSITIVE',
  'BIPOLARDISORDER_POSITIVE',
  'STRESS_POSITIVE',
  'ADDICTION_POSITIVE'],
 'annotations': [["Yes, I've been feeling constantly nervous and worried about a lot of things, even Worry about small issues\r",
   {'entities': [[23, 53, 'ANXIETY_POSITIVE'],
     [82, 106, 'ANXIETY_POSITIVE']]}],
  ["I feel anxious almost every day, even when there's no clear reason for it.\r",
   {'entities': [[7, 31, 'ANXIETY_POSITIVE'], [51, 66, 'ANXIETY_POSITIVE']]}],
  ["Honestly, it's been a rollercoaster of emotions.\r",
   {'entities': [[22, 47, 'ANXIETY_POSITIVE']]}],
  ["I've felt this undercurrent of worry for weeks now.\r",
   {'entities': [[15, 36, 'ANXIETY_POSITIVE']]}],
  ["It's like my mind racing all the time, jumping from one thing to the next.\r",
   {'entities': [[13, 24, 'ANXIETY_POSITIVE']]}],
  ['Simple tasks that used to be easy feel overwhelmed by simple tasks.\r',
   {'entities'

In [9]:
# for text, annot in tqdm(TRAIN_DATA['annotations']):
#   doc = nlp.make_doc(text)
#   ents = []
#   for start, end, label in annot["entities"]:
#     span = doc.char_span(start, end, label=label, alignment_mode="contract")
#     if span is None:
#       print("Skipping entity")
#     else:
#       ents.append(span)
#   doc.ents = ents
#   db.add(doc)

#   db.to_disk("/content/drive/MyDrive/aromatic/updated/training_data.spacy")

from tqdm import tqdm

for item in tqdm(TRAIN_DATA['annotations']):
    if item is None:
        print("Skipping None item in annotations")
        continue

    if not isinstance(item, (tuple, list)) or len(item) != 2:
        print(f"Skipping incorrectly formatted item: {item}")
        continue

    text, annot = item
    doc = nlp.make_doc(text)
    ents = []

    for start, end, label in annot.get("entities", []):
        span = doc.char_span(start, end, label=label, alignment_mode="contract")
        if span is None:
            print(f"Skipping entity in text: {text}")
        else:
            ents.append(span)

    doc.ents = ents
    db.add(doc)

db.to_disk("/content/drive/MyDrive/aromatic/updated/training_data.spacy")


100%|██████████| 631/631 [00:00<00:00, 3191.36it/s]


Skipping None item in annotations
Skipping None item in annotations
Skipping None item in annotations
Skipping None item in annotations
Skipping None item in annotations


In [10]:
! python -m spacy init config "/content/drive/MyDrive/aromatic/updated/config.cfg" --lang en --pipeline ner --optimize efficiency

[38;5;3m⚠ To generate a more effective transformer-based config (GPU-only),
install the spacy-transformers package and re-run this command. The config
generated now does not use transformers.[0m
[38;5;4mℹ Generated config template specific for your use case[0m
- Language: en
- Pipeline: ner
- Optimize for: efficiency
- Hardware: CPU
- Transformer: None
[38;5;2m✔ Auto-filled config with all values[0m
[38;5;2m✔ Saved config[0m
/content/drive/MyDrive/aromatic/updated/config.cfg
You can now add your data and train your pipeline:
python -m spacy train config.cfg --paths.train ./train.spacy --paths.dev ./dev.spacy


In [12]:
!python -m spacy train "/content/drive/MyDrive/aromatic/updated/config.cfg" --output ./ --paths.train="/content/drive/MyDrive/aromatic/updated/training_data.spacy" --paths.dev="/content/drive/MyDrive/aromatic/updated/training_data.spacy"

[38;5;4mℹ Saving to output directory: .[0m
[38;5;4mℹ Using CPU[0m
[1m
[38;5;2m✔ Initialized pipeline[0m
[1m
[38;5;4mℹ Pipeline: ['tok2vec', 'ner'][0m
[38;5;4mℹ Initial learn rate: 0.001[0m
E    #       LOSS TOK2VEC  LOSS NER  ENTS_F  ENTS_P  ENTS_R  SCORE 
---  ------  ------------  --------  ------  ------  ------  ------
  0       0          0.00     55.77    0.00    0.00    0.00    0.00
  2     200        253.17   4486.57    4.83   11.49    3.06    0.05
  5     400        697.34   4549.40   23.04   27.56   19.80    0.23
  8     600       1818.17   5494.82   42.39   45.35   39.80    0.42
 12     800       3251.05   6040.10   61.21   65.24   57.65    0.61
 18    1000       4644.89   5788.56   69.61   70.04   69.18    0.70
 24    1200       6281.57   5978.05   77.92   80.15   75.82    0.78
 32    1400       6495.43   5818.06   84.40   84.32   84.49    0.84
 42    1600       9112.93   6380.54   85.47   85.12   85.82    0.85
 53    1800       9918.55   6100.87   87.67   87.58

In [13]:
import shutil
import zipfile
import os

# Source folder path to be zipped
source_folder_path = '/content/model-last'

#/content/drive/MyDrive/Colab Notebooks/smartDiary/
# Destination zip file path
destination_zip_path = '/content/drive/MyDrive/aromatic/updated/model-last.zip'

# Zip the source folder
def zip_folder(folder_path, output_path):
    with zipfile.ZipFile(output_path, 'w', zipfile.ZIP_DEFLATED) as zipf:
        for root, dirs, files in os.walk(folder_path):
            for file in files:
                file_path = os.path.join(root, file)
                zipf.write(file_path, os.path.relpath(file_path, folder_path))

# Zip the folder
zip_folder(source_folder_path, destination_zip_path)

# Verify if the zip file has been created successfully
if os.path.exists(destination_zip_path):
    print(f'The folder has been successfully zipped and saved to: {destination_zip_path}')
else:
    print('Failed to zip the folder.')


The folder has been successfully zipped and saved to: /content/drive/MyDrive/aromatic/updated/model-last.zip


In [14]:
# Source folder path to be zipped
source_folder_path = '/content/model-best'

# Destination zip file path
destination_zip_path = '/content/drive/MyDrive/aromatic/updated/model-best.zip'

# Zip the source folder
def zip_folder(folder_path, output_path):
    with zipfile.ZipFile(output_path, 'w', zipfile.ZIP_DEFLATED) as zipf:
        for root, dirs, files in os.walk(folder_path):
            for file in files:
                file_path = os.path.join(root, file)
                zipf.write(file_path, os.path.relpath(file_path, folder_path))

# Zip the folder
zip_folder(source_folder_path, destination_zip_path)

# Verify if the zip file has been created successfully
if os.path.exists(destination_zip_path):
    print(f'The folder has been successfully zipped and saved to: {destination_zip_path}')
else:
    print('Failed to zip the folder.')

The folder has been successfully zipped and saved to: /content/drive/MyDrive/aromatic/updated/model-best.zip


In [15]:
nlp_ner = spacy.load("/content/model-best")

In [16]:
doc = nlp_ner(''' Yes, I've been feeling constantly nervous and worried about a lot of things, even Worry about small issues''')

In [17]:
spacy.displacy.render(doc, style="ent", jupyter=True)

In [18]:
import zipfile
import os

def zip_folder(folder_path, output_path):
    with zipfile.ZipFile(output_path, 'w', zipfile.ZIP_DEFLATED) as zipf:
        for root, dirs, files in os.walk(folder_path):
            for file in files:
                file_path = os.path.join(root, file)
                zipf.write(file_path, os.path.relpath(file_path, folder_path))

# Specify the folder path and output path
folder_path = '/content/drive/MyDrive/aromatic/updated/'  # Replace with your folder path
output_path = '/content/drive/MyDrive/aromatic/updated.zip'  # Replace with your desired output path

# Zip the folder
zip_folder(folder_path, output_path)