### Object Tracking with YOLOv11

---

#### @Step 1: Installing the Necessary Libraries

In [4]:
# !pip install opencv-python ultralytics

#### @Step 2: Importing the Libraries

In [1]:
import cv2
from ultralytics import YOLO

#### @Step 3: Training the Model

In [3]:
!yolo task=detect mode=predict model=yolo11m.pt source="https://ultralytics.com/images/bus.jpg"

Ultralytics 8.3.186  Python-3.11.0 torch-2.2.2+cpu CPU (Intel Core(TM) i3-3120M 2.50GHz)
YOLO11m summary (fused): 125 layers, 20,091,712 parameters, 0 gradients, 68.0 GFLOPs

Found https://ultralytics.com/images/bus.jpg locally at bus.jpg
image 1/1 c:\Doc_Python\174-@-e2enet-YOLOv11 Object Tracking\bus.jpg: 640x480 4 persons, 1 bus, 2509.5ms
Speed: 16.5ms preprocess, 2509.5ms inference, 34.7ms postprocess per image at shape (1, 3, 640, 480)
Results saved to [1mruns\detect\predict2[0m
 Learn more at https://docs.ultralytics.com/modes/predict


#### @Step 4: Choosing the Model

In [5]:
model = YOLO("yolo11m.pt")

#### @Step 5: Creating Videowriter to Save Results of the Video

In [6]:
def create_video_writer(video_cap, output_filename):
    # grab the width, height, and fps of the frames in the video stream
    frame_width = int(video_cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    frame_height = int(video_cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    # FPS (frames per second): The frame rate of the video 
    fps = int(video_cap.get(cv2.CAP_PROP_FPS))
    # initialize the FourCC (Four Character Code), which defines the video codec to use for encoding
    fourcc = cv2.VideoWriter_fourcc(*'MP4V')
    writer = cv2.VideoWriter(output_filename, fourcc, fps,
                             (frame_width, frame_height))
    return writer

In [7]:
def predict(chosen_model, img, classes=[], conf=0.5):
   if classes:
       results = chosen_model.predict(img, classes=classes, conf=conf)
   else:
       results = chosen_model.predict(img, conf=conf)
   return results

In [8]:
def predict_and_detect(chosen_model, img, classes=[], conf=0.5, rectangle_thickness=2, text_thickness=1):
   results = predict(chosen_model, img, classes, conf=conf)
   for result in results:
       for box in result.boxes:
           cv2.rectangle(img, (int(box.xyxy[0][0]), int(box.xyxy[0][1])),
                         (int(box.xyxy[0][2]), int(box.xyxy[0][3])), (255, 0, 0), rectangle_thickness)
           cv2.putText(img, f"{result.names[int(box.cls[0])]}",
                       (int(box.xyxy[0][0]), int(box.xyxy[0][1]) - 10),
                       cv2.FONT_HERSHEY_PLAIN, 1, (255, 0, 0), text_thickness)
   return img, results

#### @Step 6: Detecting Objects in Videos with YOLOv11

In [None]:
'''This code processes an entire video, applying object detection to each frame, saving the result into a new video, 
and displaying the processed frames in real time.'''

In [11]:
output_filename = "Your_1_traffic.mp4"
video_path = r"1_traffic.mp4"
cap = cv2.VideoCapture(video_path)
writer = create_video_writer(cap, output_filename)
while True:
   success, img = cap.read()
   if not success:
       break
   result_img, _ = predict_and_detect(model, img, classes=[], conf=0.5)
   writer.write(result_img)
   cv2.imshow("Image", result_img)
  
   if cv2.waitKey(1) & 0xFF == ord('q'):
        break

writer.release()
cv2.destroyAllWindows()




0: 384x640 1 bicycle, 7 cars, 1 truck, 2793.3ms
Speed: 8.0ms preprocess, 2793.3ms inference, 5.7ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 bicycle, 7 cars, 1 truck, 2217.1ms
Speed: 89.2ms preprocess, 2217.1ms inference, 5.2ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 bicycle, 8 cars, 1766.4ms
Speed: 4.9ms preprocess, 1766.4ms inference, 5.2ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 bicycle, 7 cars, 1851.1ms
Speed: 6.7ms preprocess, 1851.1ms inference, 6.1ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 bicycle, 8 cars, 1 truck, 1831.3ms
Speed: 6.2ms preprocess, 1831.3ms inference, 3.4ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 bicycle, 8 cars, 2393.2ms
Speed: 4.9ms preprocess, 2393.2ms inference, 9.7ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 bicycle, 8 cars, 1 truck, 2081.6ms
Speed: 6.2ms preprocess, 2081.6ms inference, 6.0ms postprocess per image at shape (1, 3, 3

#### @For Image Processing

In [1]:
from ultralytics import YOLO
import cv2
def predict(chosen_model, img, classes=[], conf=0.5):
   if classes:
       results = chosen_model.predict(img, classes=classes, conf=conf)
   else:
       results = chosen_model.predict(img, conf=conf)
   return results
def predict_and_detect(chosen_model, img, classes=[], conf=0.5, rectangle_thickness=2, text_thickness=1):
   results = predict(chosen_model, img, classes, conf=conf)
   for result in results:
       for box in result.boxes:
           cv2.rectangle(img, (int(box.xyxy[0][0]), int(box.xyxy[0][1])),
                         (int(box.xyxy[0][2]), int(box.xyxy[0][3])), (255, 0, 0), rectangle_thickness)
           cv2.putText(img, f"{result.names[int(box.cls[0])]}",
                       (int(box.xyxy[0][0]), int(box.xyxy[0][1]) - 10),
                       cv2.FONT_HERSHEY_PLAIN, 1, (255, 0, 0), text_thickness)
   return img, results
# model = YOLO("/content/runs/detect/train/weights/best.pt")
model = YOLO("yolo11m.pt")
# read the image
image = cv2.imread("test_auto.jpg")
result_img, _ = predict_and_detect(model, image, classes=[], conf=0.5)
cv2.imshow("Image", result_img)
cv2.imwrite("Your_test_auto.jpg", result_img)
cv2.waitKey(0)


0: 448x640 4 persons, 5 cars, 3121.6ms
Speed: 50.8ms preprocess, 3121.6ms inference, 37.6ms postprocess per image at shape (1, 3, 448, 640)


-1